From da37737e71b200ef348ed9fe105c366d20be04b3 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Fri, 11 Apr 2025 11:34:22 -0700
Subject: [PATCH 01/39] Vectorize report graphics

---
 pyproject.toml                   |  2 +-
 src/finemo/evaluation.py         | 35 ++++++++++++++++++++++----------
 src/finemo/main.py               |  6 ++----
 src/finemo/templates/report.html | 20 +++++++++---------
 4 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 111f8c9..0b33113 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "finemo"
 description = "Identification of regulatory elements from neural network contribution scores for DNA."
 keywords = ["deep learning", "genomics"]
-version = "0.30"
+version = "0.31"
 readme = "README.md"
 license = {file = "LICENSE"}
 authors = [
diff --git a/src/finemo/evaluation.py b/src/finemo/evaluation.py
index 8fecafc..a121fb5 100644
--- a/src/finemo/evaluation.py
+++ b/src/finemo/evaluation.py
@@ -77,8 +77,10 @@ def plot_hit_distributions(occ_df, motif_names, plot_dir):
         y[unique] = freq
         ax.bar(x, y)
 
-        output_path = os.path.join(motifs_dir, f"{m}.png")
-        plt.savefig(output_path, dpi=300)
+        output_path_png = os.path.join(motifs_dir, f"{m}.png")
+        plt.savefig(output_path_png, dpi=300)
+        output_path_svg = os.path.join(motifs_dir, f"{m}.svg")
+        plt.savefig(output_path_svg)
 
         plt.close(fig)
     
@@ -95,13 +97,15 @@ def plot_hit_distributions(occ_df, motif_names, plot_dir):
     ax.set_xlabel("Motifs per peak")
     ax.set_ylabel("Frequency")
 
-    output_path = os.path.join(plot_dir, "total_hit_distribution.png")
-    plt.savefig(output_path, dpi=300)
+    output_path_png = os.path.join(plot_dir, "total_hit_distribution.png")
+    plt.savefig(output_path_png, dpi=300)
+    output_path_svg = os.path.join(plot_dir, "total_hit_distribution.svg")
+    plt.savefig(output_path_svg, dpi=300)
 
     plt.close(fig)
 
 
-def plot_peak_motif_indicator_heatmap(peak_hit_counts, motif_names, output_path):
+def plot_peak_motif_indicator_heatmap(peak_hit_counts, motif_names, output_dir):
     """
     Plots a simple indicator heatmap of the motifs in each peak.
     """
@@ -122,7 +126,10 @@ def plot_peak_motif_indicator_heatmap(peak_hit_counts, motif_names, output_path)
     ax.set_xlabel("Motif i")
     ax.set_ylabel("Motif j")
 
-    plt.savefig(output_path, dpi=300)
+    output_path_png = os.path.join(output_dir, "motif_cooocurrence.png")
+    plt.savefig(output_path_png, dpi=300)
+    output_path_svg = os.path.join(output_dir, "motif_cooocurrence.svg")
+    plt.savefig(output_path_svg)
 
     plt.close()
 
@@ -393,8 +400,6 @@ def plot_cwms(cwms, trim_bounds, out_dir, alphabet=LOGO_ALPHABET, colors=LOGO_CO
         motif_dir = os.path.join(out_dir, m)
         os.makedirs(motif_dir, exist_ok=True)
         for cwm_type, cwm in v.items():
-            output_path = os.path.join(motif_dir, f"{cwm_type}.png")
-
             fig, ax = plt.subplots(figsize=(10,2))
 
             plot_logo(ax, cwm, alphabet, colors=colors, font_props=font, shade_bounds=trim_bounds[m][cwm_type])
@@ -402,11 +407,15 @@ def plot_cwms(cwms, trim_bounds, out_dir, alphabet=LOGO_ALPHABET, colors=LOGO_CO
             for name, spine in ax.spines.items():
                 spine.set_visible(False)
             
-            plt.savefig(output_path, dpi=100)
+            output_path_png = os.path.join(motif_dir, f"{cwm_type}.png")
+            plt.savefig(output_path_png, dpi=100)
+            output_path_svg = os.path.join(motif_dir, f"{cwm_type}.svg")
+            plt.savefig(output_path_svg)
+
             plt.close(fig)
 
 
-def plot_hit_vs_seqlet_counts(recall_data, output_path):
+def plot_hit_vs_seqlet_counts(recall_data, output_dir):
     x = []
     y = []
     m = []
@@ -430,7 +439,11 @@ def plot_hit_vs_seqlet_counts(recall_data, output_path):
     ax.set_xlabel("Hits per motif")
     ax.set_ylabel("Seqlets per motif")
 
-    plt.savefig(output_path, dpi=300)
+    output_path_png = os.path.join(output_dir, "hit_vs_seqlet_counts.png")
+    plt.savefig(output_path_png, dpi=300)
+    output_path_svg = os.path.join(output_dir, "hit_vs_seqlet_counts.svg")
+    plt.savefig(output_path_svg)
+
     plt.close()
 
 
diff --git a/src/finemo/main.py b/src/finemo/main.py
index 77ce256..fd031fd 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -216,8 +216,7 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
 
     evaluation.plot_hit_distributions(occ_df, motif_names, out_dir)
 
-    coooc_path = os.path.join(out_dir, "motif_cooocurrence.png")
-    evaluation.plot_peak_motif_indicator_heatmap(coooc, motif_names, coooc_path)
+    evaluation.plot_peak_motif_indicator_heatmap(coooc, motif_names, out_dir)
 
     plot_dir = os.path.join(out_dir, "CWMs")
     evaluation.plot_cwms(cwms, trim_bounds, plot_dir)
@@ -227,8 +226,7 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
         seqlets_path = os.path.join(out_dir, "seqlets.tsv")
         data_io.write_modisco_seqlets(seqlets_df, seqlets_path)
 
-        plot_path = os.path.join(out_dir, "hit_vs_seqlet_counts.png")
-        evaluation.plot_hit_vs_seqlet_counts(report_data, plot_path)
+        evaluation.plot_hit_vs_seqlet_counts(report_data, out_dir)
 
     report_path = os.path.join(out_dir, "report.html")
     evaluation.write_report(report_df, motif_names, report_path, compute_recall, seqlets_df is not None)
diff --git a/src/finemo/templates/report.html b/src/finemo/templates/report.html
index dff2f92..2b58a7e 100644
--- a/src/finemo/templates/report.html
+++ b/src/finemo/templates/report.html
@@ -201,7 +201,7 @@ <h3>Hit vs. seqlet counts</h3>
     The dashed line is the identity line. 
     When comparing a shared set of regions, the hit counts should be mostly greater than the corresponding seqlet counts, since TF-MoDISco stringently filters seqlets and usually uses a smaller input window. 
 </p>
-<img src="hit_vs_seqlet_counts.png" width="780">
+<img src="hit_vs_seqlet_counts.svg" width="780">
 {% endif %}
 
 <h3>Hit and seqlet motif comparisons</h3>
@@ -309,13 +309,13 @@ <h3>Hit and seqlet motif comparisons</h3>
             <td class="num_col">{{ item.num_seqlets_only }}</td>
             <td class="num_col">{{ item.num_hits_restricted_only }}</td>
             {% endif %}
-            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/hits_fc.png" width="360"></td>
-            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/hits_rc.png" width="360"></td>
-            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/modisco_fc.png" width="360"></td>
-            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/modisco_rc.png" width="360"></td>
+            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/hits_fc.svg" width="360"></td>
+            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/hits_rc.svg" width="360"></td>
+            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/modisco_fc.svg" width="360"></td>
+            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/modisco_rc.svg" width="360"></td>
             {% if compute_recall %}
-            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/seqlets_only.png" width="360"></td>
-            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/hits_restricted_only.png" width="360"></td>
+            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/seqlets_only.svg" width="360"></td>
+            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/hits_restricted_only.svg" width="360"></td>
             {% endif %}
         </tr>
         {% endfor %}
@@ -332,7 +332,7 @@ <h3>Overall distribution of hits per peak</h3>
     This plot shows the distribution of hit counts per peak for any motif.
     The number of peaks with no hits should be near zero. 
 </p>
-<img src="total_hit_distribution.png" width="780">
+<img src="total_hit_distribution.svg" width="780">
 
 <h3>Per-motif distributions of hits per peak</h3>
 <p>
@@ -349,7 +349,7 @@ <h3>Per-motif distributions of hits per peak</h3>
         {% for m in motif_names %}
         <tr>
             <td><code>{{ m }}</code></td>
-            <td class="distplot"><img src="motif_hit_distributions/{{ m }}.png" width="480"></td>
+            <td class="distplot"><img src="motif_hit_distributions/{{ m }}.svg" width="480"></td>
         </tr>
         {% endfor %}
     </tbody>
@@ -361,7 +361,7 @@ <h3>Motif co-occurrence</h3>
     The color intensity here represents the cosine similarity between the motifs' occurrence across peaks,
     where occurence is defined as the presence of a hit for a motif in a peak.
 </p>
-<img src="motif_cooocurrence.png" width="780">
+<img src="motif_cooocurrence.svg" width="780">
 
 </body>
 </html>

From ac1112ac201a123d68f94742a762e90b865adf0d Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sat, 12 Apr 2025 06:19:44 -0700
Subject: [PATCH 02/39] Additional hit distribution plots

---
 src/finemo/evaluation.py         | 50 ++++++++++++++++++++++++++++++--
 src/finemo/main.py               |  4 ++-
 src/finemo/templates/report.html | 37 +++++++++++++++--------
 3 files changed, 75 insertions(+), 16 deletions(-)

diff --git a/src/finemo/evaluation.py b/src/finemo/evaluation.py
index a121fb5..4dddd38 100644
--- a/src/finemo/evaluation.py
+++ b/src/finemo/evaluation.py
@@ -62,12 +62,56 @@ def get_motif_occurences(hits_df, motif_names):
     return occ_df, coocc
 
 
-def plot_hit_distributions(occ_df, motif_names, plot_dir):
+def plot_hit_stat_distributions(hits_df, motif_names, plot_dir):
+    hits_df = hits_df.collect()
+    hits_by_motif = hits_df.partition_by("motif_name", as_dict=True)
+    dummy_df = hits_df.clear()
+
+    motifs_dir = os.path.join(plot_dir, "motif_stat_distributions")
+    os.makedirs(motifs_dir, exist_ok=True)
+    for m in motif_names:
+        hits = hits_by_motif.get((m,), dummy_df)
+        coefficients = hits.get_column("hit_coefficient_global").to_numpy()
+        similarities = hits.get_column("hit_similarity").to_numpy()
+        importances = hits.get_column("hit_importance").to_numpy()
+
+        fig, ax = plt.subplots(figsize=(5, 2))
+
+        ax.hist(coefficients, bins=50)
+
+        output_path_png = os.path.join(motifs_dir, f"{m}_coefficients.png")
+        plt.savefig(output_path_png, dpi=300)
+        output_path_svg = os.path.join(motifs_dir, f"{m}_coefficients.svg")
+        plt.savefig(output_path_svg)
+        plt.close(fig)
+
+        fig, ax = plt.subplots(figsize=(5, 2))
+
+        ax.hist(similarities, bins=50)
+
+        output_path_png = os.path.join(motifs_dir, f"{m}_similarities.png")
+        plt.savefig(output_path_png, dpi=300)
+        output_path_svg = os.path.join(motifs_dir, f"{m}_similarities.svg")
+        plt.savefig(output_path_svg)
+        plt.close(fig)
+
+        fig, ax = plt.subplots(figsize=(5, 2))
+
+        ax.hist(importances, bins=50)
+
+        output_path_png = os.path.join(motifs_dir, f"{m}_importances.png")
+        plt.savefig(output_path_png, dpi=300)
+        output_path_svg = os.path.join(motifs_dir, f"{m}_importances.svg")
+        plt.savefig(output_path_svg)
+        plt.close(fig)
+
+
+def plot_hit_peak_distributions(occ_df, motif_names, plot_dir):
     motifs_dir = os.path.join(plot_dir, "motif_hit_distributions")
     os.makedirs(motifs_dir, exist_ok=True)
 
     for m in motif_names:
-        fig, ax = plt.subplots(figsize=(6, 2))
+        fig, ax = plt.subplots(figsize=(5, 2))
 
         unique, counts = np.unique(occ_df.get_column(m), return_counts=True)
         freq = counts / counts.sum()
@@ -94,7 +138,7 @@ def plot_hit_distributions(occ_df, motif_names, plot_dir):
     y[unique] = freq
     ax.bar(x, y)
 
-    ax.set_xlabel("Motifs per peak")
+    ax.set_xlabel("Total hits per region")
     ax.set_ylabel("Frequency")
 
     output_path_png = os.path.join(plot_dir, "total_hit_distribution.png")
diff --git a/src/finemo/main.py b/src/finemo/main.py
index fd031fd..caff09e 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -214,7 +214,9 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
 
     data_io.write_report_data(report_df, cwms, out_dir)
 
-    evaluation.plot_hit_distributions(occ_df, motif_names, out_dir)
+    evaluation.plot_hit_stat_distributions(hits_df, motif_names, out_dir)
+
+    evaluation.plot_hit_peak_distributions(occ_df, motif_names, out_dir)
 
     evaluation.plot_peak_motif_indicator_heatmap(coooc, motif_names, out_dir)
 
diff --git a/src/finemo/templates/report.html b/src/finemo/templates/report.html
index 2b58a7e..0b2ed57 100644
--- a/src/finemo/templates/report.html
+++ b/src/finemo/templates/report.html
@@ -170,6 +170,7 @@
         }
 
         .distplot img {
+            max-width: unset;
             margin-bottom: 0
         }
         
@@ -322,34 +323,46 @@ <h3>Hit and seqlet motif comparisons</h3>
     </tbody>
 </table>
 
-<h2>Hit distributions</h2>
+<h2>Hit statistic distributions</h2>
 <p>
-    The following figures visualize the distribution of hits across motifs and peaks. 
+    The following figures visualize the distribution of hit statistics across motifs and regions. 
 </p>
 
-<h3>Overall distribution of hits per peak</h3>
+<h3>Overall distribution of hit counts per region</h3>
 <p>
-    This plot shows the distribution of hit counts per peak for any motif.
-    The number of peaks with no hits should be near zero. 
+    This plot shows the distribution of hit counts per region for any motif.
+    The number of regions with no hits should be near zero. 
 </p>
 <img src="total_hit_distribution.svg" width="780">
 
-<h3>Per-motif distributions of hits per peak</h3>
+<h3>Per-motif distributions of hit statistics</h3>
 <p>
-    These plots show the distribution of hit counts per peak for each motif.
+    These plots show the distribution of hit statistics for each motif, specifically:
+    <ul>
+        <li><strong>Hits Per Region</strong>: The number of hits per region for each motif</li>
+        <li><strong>Hit Coefficient</strong>: The the distribution of hit coefficients for each motif</li>
+        <li><strong>Hit Similarity</strong>: The distribution of hit-CWM similarity for each motif</li>
+        <li><strong>Hit Importance</strong>: The distribution of the total importance of hits for each motif</li>
+    </ul>
 </p>
 <table>
     <thead>
         <tr>
             <th>Motif Name</th>
-            <th class="distplot">Hits Per Peak</th>
+            <th class="distplot">Hits Per Region</th>
+            <th class="distplot">Hit Coefficient</th>
+            <th class="distplot">Hit Similarity</th>
+            <th class="distplot">Hit Importance</th>
         </tr>
     </thead>
     <tbody>
         {% for m in motif_names %}
         <tr>
             <td><code>{{ m }}</code></td>
-            <td class="distplot"><img src="motif_hit_distributions/{{ m }}.svg" width="480"></td>
+            <td class="distplot"><img src="motif_hit_distributions/{{ m }}.svg" width="360"></td>
+            <td class="distplot"><img src="motif_stat_distributions/{{ m }}_coefficients.svg" width="360"></td>
+            <td class="distplot"><img src="motif_stat_distributions/{{ m }}_similarities.svg" width="360"></td>
+            <td class="distplot"><img src="motif_stat_distributions/{{ m }}_importances.svg" width="360"></td>
         </tr>
         {% endfor %}
     </tbody>
@@ -357,9 +370,9 @@ <h3>Per-motif distributions of hits per peak</h3>
 
 <h3>Motif co-occurrence</h3>
 <p>
-    This heatmap shows the co-occurrence of motifs across peaks.
-    The color intensity here represents the cosine similarity between the motifs' occurrence across peaks,
-    where occurence is defined as the presence of a hit for a motif in a peak.
+    This heatmap shows the co-occurrence of motifs across regions.
+    The color intensity here represents the cosine similarity between the motifs' occurrence across regions,
+    where occurence is defined as the presence of a hit for a motif in a region.
 </p>
 <img src="motif_cooocurrence.svg" width="780">
 

From 20853997f44ab1297e13ce9c934c5b2edd8f471e Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 13 Apr 2025 16:17:24 -0700
Subject: [PATCH 03/39] Utility for collapsing overlapping hits

---
 environment.yml              |  1 +
 pyproject.toml               |  3 +-
 src/finemo/data_io.py        | 13 +++++--
 src/finemo/evaluation.py     |  6 +--
 src/finemo/main.py           | 24 ++++++++++++
 src/finemo/postprocessing.py | 72 ++++++++++++++++++++++++++++++++++++
 6 files changed, 111 insertions(+), 8 deletions(-)
 create mode 100644 src/finemo/postprocessing.py

diff --git a/environment.yml b/environment.yml
index 01e7af9..98e4cde 100644
--- a/environment.yml
+++ b/environment.yml
@@ -8,6 +8,7 @@ dependencies:
   - pytorch=2.5.1
   - pytorch-cuda=12.4
   - python=3.11
+  - numba=0.61.2
   - numpy=2.2.0
   - scipy=1.14.1
   - polars=1.17.1
diff --git a/pyproject.toml b/pyproject.toml
index 0b33113..58ddb2e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "finemo"
 description = "Identification of regulatory elements from neural network contribution scores for DNA."
 keywords = ["deep learning", "genomics"]
-version = "0.31"
+version = "0.32"
 readme = "README.md"
 license = {file = "LICENSE"}
 authors = [
@@ -17,6 +17,7 @@ dependencies = [
     "numpy",
     "scipy",
     "torch",
+    "numba",
     "polars>=1.0",
     "matplotlib",
     "h5py",
diff --git a/src/finemo/data_io.py b/src/finemo/data_io.py
index d5b119a..cd74fac 100644
--- a/src/finemo/data_io.py
+++ b/src/finemo/data_io.py
@@ -209,7 +209,6 @@ def write_regions_npz(sequences, contributions, out_path, peaks_df=None):
                             chr=chr_arr, chr_id=chr_id_arr, start=start_arr, peak_id=peak_id_arr, peak_name=peak_name_arr)
 
 
-
 def trim_motif(cwm, trim_threshold):
     """
     Adapted from https://github.com/jmschrei/tfmodisco-lite/blob/570535ee5ccf43d670e898d92d63af43d68c38c5/modiscolite/report.py#L213-L236
@@ -439,18 +438,24 @@ def write_modisco_seqlets(seqlets_df, out_path):
     "strand": pl.String,
     "peak_name": pl.String,
     "peak_id": pl.UInt32,
-    
 }
+HITS_COLLAPSED_DTYPES = HITS_DTYPES | {"is_primary": pl.UInt32}
+
 
-def load_hits(hits_path, lazy=False):
+def load_hits(hits_path, lazy=False, schema=HITS_DTYPES):
     hits_df = (
-        pl.scan_csv(hits_path, separator='\t', quote_char=None, schema=HITS_DTYPES)
+        pl.scan_csv(hits_path, separator='\t', quote_char=None, schema=schema)
         .with_columns(pl.lit(1).alias("count"))
     )
 
     return hits_df if lazy else hits_df.collect()
 
 
+def write_hits_processed(hits_df, out_path, schema=HITS_DTYPES):
+    hits_df = hits_df.select(schema.keys())
+    hits_df.write_csv(out_path, separator="\t")
+
+
 def write_hits(hits_df, peaks_df, motifs_df, qc_df, out_dir, motif_width):
     os.makedirs(out_dir, exist_ok=True)
     out_path_tsv = os.path.join(out_dir, "hits.tsv")
diff --git a/src/finemo/evaluation.py b/src/finemo/evaluation.py
index 4dddd38..72358c5 100644
--- a/src/finemo/evaluation.py
+++ b/src/finemo/evaluation.py
@@ -77,7 +77,7 @@ def plot_hit_stat_distributions(hits_df, motif_names, plot_dir):
 
         fig, ax = plt.subplots(figsize=(5, 2))
 
-        ax.hist(coefficients, bins=50)
+        ax.hist(coefficients, bins=50, density=True)
 
         output_path_png = os.path.join(motifs_dir, f"{m}_coefficients.png")
         plt.savefig(output_path_png, dpi=300)
@@ -87,7 +87,7 @@ def plot_hit_stat_distributions(hits_df, motif_names, plot_dir):
 
         fig, ax = plt.subplots(figsize=(5, 2))
 
-        ax.hist(similarities, bins=50)
+        ax.hist(similarities, bins=50, density=True)
 
         output_path_png = os.path.join(motifs_dir, f"{m}_similarities.png")
         plt.savefig(output_path_png, dpi=300)
@@ -97,7 +97,7 @@ def plot_hit_stat_distributions(hits_df, motif_names, plot_dir):
 
         fig, ax = plt.subplots(figsize=(5, 2))
 
-        ax.hist(importances, bins=50)
+        ax.hist(importances, bins=50, density=True)
 
         output_path_png = os.path.join(motifs_dir, f"{m}_importances.png")
         plt.savefig(output_path_png, dpi=300)
diff --git a/src/finemo/main.py b/src/finemo/main.py
index caff09e..6bae4ff 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -234,6 +234,15 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
     evaluation.write_report(report_df, motif_names, report_path, compute_recall, seqlets_df is not None)
 
 
+def collapse_hits(hits_path, out_path, overlap):
+    from . import postprocessing
+
+    hits_df = data_io.load_hits(hits_path, lazy=False)
+    hits_collapsed_df = postprocessing.collapse_hits(hits_df, overlap)
+
+    data_io.write_hits_processed(hits_collapsed_df, out_path, schema=data_io.HITS_COLLAPSED_DTYPES)
+
+
 def cli():
     parser = argparse.ArgumentParser()
     subparsers = parser.add_subparsers(required=True, dest='cmd')
@@ -418,6 +427,17 @@ def cli():
         help="Do not compute motif recall metrics.")
     report_parser.add_argument("-s", "--no-seqlets", action='store_true',
         help="DEPRECATED: Please omit the `--modisco-h5` argument instead.")
+
+
+    collapse_hits_parser = subparsers.add_parser("collapse-hits", formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        help="Identify best hit among sets of overlapping hits by motif similarity.")
+    
+    collapse_hits_parser.add_argument("-i", "--hits", type=str, required=True,
+        help="The `hits.tsv` or `hits_unique.tsv` file from `call-hits`.")
+    collapse_hits_parser.add_argument("-o", "--out-path", type=str, required=True,
+        help="The path to the output .tsv file with an additional \"is_primary\" column.")
+    collapse_hits_parser.add_argument("-O", "--overlap", type=int, default=3,
+        help="The minimum number of base pairs to consider as overlapping.")
     
 
     args = parser.parse_args()
@@ -458,3 +478,7 @@ def cli():
         report(args.regions, args.hits, args.modisco_h5, args.peaks, args.motifs_include, 
                args.motif_names, args.out_dir, args.modisco_region_width, args.cwm_trim_threshold, 
                not args.no_recall, not args.no_seqlets)
+
+    elif args.cmd == "collapse-hits":
+        collapse_hits(args.hits, args.out_path, args.overlap)
+
diff --git a/src/finemo/postprocessing.py b/src/finemo/postprocessing.py
new file mode 100644
index 0000000..be312d0
--- /dev/null
+++ b/src/finemo/postprocessing.py
@@ -0,0 +1,72 @@
+import heapq
+
+import numpy as np
+import polars as pl
+from numba import njit
+from numba.types import Array, uint32, int32, float32
+
+@njit(
+    uint32[:](
+        Array(uint32, 1, 'C', readonly=True), 
+        Array(int32, 1, 'C', readonly=True), 
+        Array(int32, 1, 'C', readonly=True), 
+        Array(float32, 1, 'C', readonly=True)
+    )
+)
+def _collapse_hits(chrom_ids, starts, ends, similarities):
+    n = chrom_ids.shape[0]
+    out = np.ones(n, dtype=np.uint32)
+    heap = [(np.uint32(0), np.int32(0), -1) for _ in range(0)]
+
+    for i in range(n):
+        chrom_new = chrom_ids[i]
+        start_new = starts[i]
+        end_new = ends[i]
+        sim_new = similarities[i]
+
+        while heap and heap[0] < (chrom_new, start_new, -1):
+            heapq.heappop(heap)
+
+        for _, _, idx in heap:
+            cmp = sim_new > similarities[idx]
+            out[idx] &= cmp
+            out[i] &= not cmp
+
+        heapq.heappush(heap, (chrom_new, end_new, i))
+
+    return out
+
+
+def collapse_hits(hits_df, overlap):
+    chroms = hits_df["chr"].unique(maintain_order=True)
+
+    if not chroms.is_empty():
+        chrom_to_id = {
+            chrom: i for i, chrom in enumerate(chroms)
+        }
+        df = hits_df.select(
+            chrom_id=pl.col("chr").replace_strict(chrom_to_id, return_dtype=pl.UInt32),
+            start_trim=pl.col("start") * 2 + overlap,
+            end_trim=pl.col("end") * 2 - overlap,
+            similarity=pl.col("hit_similarity")
+        )
+    else:
+        df = hits_df.select(
+            chrom_id=pl.col("peak_id"),
+            start_trim=pl.col("start") * 2 + overlap,
+            end_trim=pl.col("end") * 2 - overlap,
+            similarity=pl.col("hit_similarity")
+        )
+
+    df = df.rechunk()
+    chrom_ids = df["chrom_id"].to_numpy(allow_copy=False)
+    starts = df["start_trim"].to_numpy(allow_copy=False)
+    ends = df["end_trim"].to_numpy(allow_copy=False)
+    similarities = df["similarity"].to_numpy(allow_copy=False)
+    is_primary = _collapse_hits(chrom_ids, starts, ends, similarities)
+
+    df_out = hits_df.with_columns(
+        is_primary=pl.Series(is_primary, dtype=pl.UInt32)
+    )
+
+    return df_out

From c4560942820a9183c0674e77dbc586fbfacf7c20 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 13 Apr 2025 16:27:21 -0700
Subject: [PATCH 04/39] Documentation for `collapse-hits`

---
 README.md | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 138c738..52bb243 100644
--- a/README.md
+++ b/README.md
@@ -203,7 +203,7 @@ Usage: `finemo call-hits -r <regions> -m <modisco_h5> -o <out_dir> [-p <peaks>]
 - Legacy TFMoDISCo H5 files can be updated to the newer TFMoDISCo-lite format with the `modisco convert` command found in the [tfmodisco-lite](https://github.com/jmschrei/tfmodisco-lite/tree/main) package.
 - The hit-calling thresholding procedure is scale-invariant. That is, whether a position is assigned a hit depends on the shapes of the motif CWM and the contribution scores, not the absolute magnitude of the scores. If you wish to prioritize hits based on the magnitude of the contribution scores, set a per-motif rank threshold the `hit_coefficient_global` field in the `hits.tsv` file, which captures both the absolute importance and the closeness of match.
 
-### Output reporting
+### Output reporting and post-processing
 
 #### `finemo report`
 
@@ -220,12 +220,19 @@ Usage: `finemo report -r <regions> -H <hits> -o <out_dir> [-m <modisco_h5>] [-W
 - `-W/--modisco-region-width`: The width of the region around each peak summit used by tfmodisco-lite. Default is 400.
 - `-n/--no-recall`: Do not compute motif recall metrics. Default is False.
 
-#### Additional outputs
+Additional report outputs:
 
-`motif_report.tsv`: Statistics on the distribution of hits per motif. The columns and values correspond to those in the HTML report's table.
+- `motif_report.tsv`: Statistics on the distribution of hits per motif. The columns and values correspond to those in the HTML report's table.
+- `motif_occurrences.tsv`: The number of hits of each motif in each input region. Also includes the total number of hits per region.
+- `CWMs`: A directory containing visualizations of motif CWMs, as well as corresponding tables with numerical CWM values.
+- `seqlets.tsv`: tf-modisco seqlet coordinates for each motif in each region. Only generated if `-m/--modisco-h5` is provided.
 
-`motif_occurrences.tsv`: The number of hits of each motif in each input region. Also includes the total number of hits per region.
+#### `finemo collapse-hits`
 
-`CWMs`: A directory containing visualizations of motif CWMs, as well as corresponding tables with numerical CWM values.
+Identify the best hits by motif similarity within groups of overlapping hits. Adds a 0/1 `is_primary` column to the `hits.tsv` file, indicating whether a hit is the best hit in its group. This command does not utilize the GPU.
 
-`seqlets.tsv`: tf-modisco seqlet coordinates for each motif in each region. Only generated if `-m/--modisco-h5` is provided.
+Usage: `usage: finemo collapse-hits [-h] -i <hits> -o <out_path> [-O <overlap>]`
+
+- `-i/--hits`: The path to the input hits file. This should be the `hits.tsv` or `hits_unique.tsv` file generated by the `finemo call-hits` command.
+- `-o/--out-path`: The path to the output file. This will be a copy of the input file with an additional `is_primary` column.
+- `-O/--overlap`: The minimum overlap (in base pairs) required for two hits to be considered overlapping. Default is 3 bp.

From 5bc44d6319a18cf387606bc44b9666e59799e87b Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 13 Apr 2025 16:32:34 -0700
Subject: [PATCH 05/39] Cache jit outputs

---
 README.md                    | 2 +-
 src/finemo/postprocessing.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 52bb243..df8c798 100644
--- a/README.md
+++ b/README.md
@@ -231,7 +231,7 @@ Additional report outputs:
 
 Identify the best hits by motif similarity within groups of overlapping hits. Adds a 0/1 `is_primary` column to the `hits.tsv` file, indicating whether a hit is the best hit in its group. This command does not utilize the GPU.
 
-Usage: `usage: finemo collapse-hits [-h] -i <hits> -o <out_path> [-O <overlap>]`
+Usage: `usage: finemo collapse-hits -i <hits> -o <out_path> [-O <overlap>]`
 
 - `-i/--hits`: The path to the input hits file. This should be the `hits.tsv` or `hits_unique.tsv` file generated by the `finemo call-hits` command.
 - `-o/--out-path`: The path to the output file. This will be a copy of the input file with an additional `is_primary` column.
diff --git a/src/finemo/postprocessing.py b/src/finemo/postprocessing.py
index be312d0..296758a 100644
--- a/src/finemo/postprocessing.py
+++ b/src/finemo/postprocessing.py
@@ -11,7 +11,7 @@
         Array(int32, 1, 'C', readonly=True), 
         Array(int32, 1, 'C', readonly=True), 
         Array(float32, 1, 'C', readonly=True)
-    )
+    ), cache=True
 )
 def _collapse_hits(chrom_ids, starts, ends, similarities):
     n = chrom_ids.shape[0]

From 3618533f50bf854f46df0a08d5d381e38b0a1c62 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 13 Apr 2025 18:48:17 -0700
Subject: [PATCH 06/39] Hit intersection utility

---
 README.md                    | 10 ++++++++++
 pyproject.toml               |  2 +-
 src/finemo/data_io.py        |  3 ++-
 src/finemo/main.py           | 25 ++++++++++++++++++++++++-
 src/finemo/postprocessing.py | 29 +++++++++++++++++++++++++++++
 5 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index df8c798..2e30598 100644
--- a/README.md
+++ b/README.md
@@ -236,3 +236,13 @@ Usage: `usage: finemo collapse-hits -i <hits> -o <out_path> [-O <overlap>]`
 - `-i/--hits`: The path to the input hits file. This should be the `hits.tsv` or `hits_unique.tsv` file generated by the `finemo call-hits` command.
 - `-o/--out-path`: The path to the output file. This will be a copy of the input file with an additional `is_primary` column.
 - `-O/--overlap`: The minimum overlap (in base pairs) required for two hits to be considered overlapping. Default is 3 bp.
+
+#### `finemo intersect-hits`
+
+Find the intersection of hits across multiple runs. This command does not utilize the GPU.
+
+Usage: `finemo intersect-hits -i <hits> -o <out_path> [-r]`
+
+- `-i/--hits`: The path to one or more input hits file. This should be the `hits.tsv` or `hits_unique.tsv` file generated by the `finemo call-hits` command.
+- `-o/--out-path`: The path to the output file. Reoccuring columns are suffixed with the positional index of the input file (e.g. `hit_importance_1`), with the exception of index 0.
+- `-r/--relaxed`: By default, the intersection assumes consistent input region definitions (name and coordinates) and motif trimming across runs. In contrast, this relaxed intersection criteria uses only motif names and untrimmed hit coordinates. However, this is not suitable when hit genomic coordinates are unknown (e.g. when using `finemo call-hits` with `-p/--peaks`). Default is False.
diff --git a/pyproject.toml b/pyproject.toml
index 58ddb2e..9e9d76a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "finemo"
 description = "Identification of regulatory elements from neural network contribution scores for DNA."
 keywords = ["deep learning", "genomics"]
-version = "0.32"
+version = "0.33"
 readme = "README.md"
 license = {file = "LICENSE"}
 authors = [
diff --git a/src/finemo/data_io.py b/src/finemo/data_io.py
index cd74fac..35e36e9 100644
--- a/src/finemo/data_io.py
+++ b/src/finemo/data_io.py
@@ -452,7 +452,8 @@ def load_hits(hits_path, lazy=False, schema=HITS_DTYPES):
 
 
 def write_hits_processed(hits_df, out_path, schema=HITS_DTYPES):
-    hits_df = hits_df.select(schema.keys())
+    if schema is not None:
+        hits_df = hits_df.select(schema.keys())
     hits_df.write_csv(out_path, separator="\t")
 
 
diff --git a/src/finemo/main.py b/src/finemo/main.py
index 6bae4ff..1fed513 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -243,6 +243,15 @@ def collapse_hits(hits_path, out_path, overlap):
     data_io.write_hits_processed(hits_collapsed_df, out_path, schema=data_io.HITS_COLLAPSED_DTYPES)
 
 
+def intersect_hits(hits_paths, out_path, relaxed):
+    from . import postprocessing
+
+    hits_dfs = [data_io.load_hits(hits_path, lazy=False) for hits_path in hits_paths]
+    hits_df = postprocessing.intersect_hits(hits_dfs, relaxed)
+
+    data_io.write_hits_processed(hits_df, out_path, schema=None)
+
+
 def cli():
     parser = argparse.ArgumentParser()
     subparsers = parser.add_subparsers(required=True, dest='cmd')
@@ -430,7 +439,7 @@ def cli():
 
 
     collapse_hits_parser = subparsers.add_parser("collapse-hits", formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        help="Identify best hit among sets of overlapping hits by motif similarity.")
+        help="Identify best hit by motif similarity among sets of overlapping hits.")
     
     collapse_hits_parser.add_argument("-i", "--hits", type=str, required=True,
         help="The `hits.tsv` or `hits_unique.tsv` file from `call-hits`.")
@@ -440,6 +449,17 @@ def cli():
         help="The minimum number of base pairs to consider as overlapping.")
     
 
+    intersect_hits_parser = subparsers.add_parser("intersect-hits", formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        help="Intersect hits across multiple runs.")
+    
+    intersect_hits_parser.add_argument("-i", "--hits", type=str, required=True, nargs='+',
+        help="One or more hits.tsv or hits_unique.tsv files, with paths delimited by whitespace.")
+    intersect_hits_parser.add_argument("-o", "--out-path", type=str, required=True,
+        help="The path to the output .tsv file. Duplicate columns are suffixed with the positional index of the input file.")
+    intersect_hits_parser.add_argument("-r", "--relaxed", action='store_true',
+        help="Use relaxed intersection criteria, using only motif names and untrimmed coordinates. By default, the intersection assumes consistent region definitions and motif trimming. This option is not recommended if genomic coordinates are unavailable.")
+
+
     args = parser.parse_args()
     
     if args.cmd == "extract-regions-bw":
@@ -482,3 +502,6 @@ def cli():
     elif args.cmd == "collapse-hits":
         collapse_hits(args.hits, args.out_path, args.overlap)
 
+    elif args.cmd == "intersect-hits":
+        intersect_hits(args.hits, args.out_path, args.relaxed)
+
diff --git a/src/finemo/postprocessing.py b/src/finemo/postprocessing.py
index 296758a..364f371 100644
--- a/src/finemo/postprocessing.py
+++ b/src/finemo/postprocessing.py
@@ -70,3 +70,32 @@ def collapse_hits(hits_df, overlap):
     )
 
     return df_out
+
+
+def intersect_hits(hits_dfs, relaxed):
+    if relaxed:
+        join_cols = [
+            "chr", "start_untrimmed", "end_untrimmed",
+            "motif_name", "strand"
+        ]
+    else:
+        join_cols = [
+            "chr", "start", "end", "start_untrimmed", "end_untrimmed",
+            "motif_name", "strand", "peak_name", "peak_id"
+        ]
+
+    if len(hits_dfs) < 1:
+        raise ValueError("At least one hits dataframe required")
+
+    hits_df = hits_dfs[0]
+    for i in range(1, len(hits_dfs)):
+        hits_df = hits_df.join(
+            hits_dfs[i],
+            on=join_cols,
+            how="inner",
+            suffix=f"_{i}",
+            join_nulls=True,
+            coalesce=True
+        )
+
+    return hits_df
\ No newline at end of file

From 0241df390b42c05d49084f58aa66d8f4bede38cb Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 14 Apr 2025 12:12:46 -0700
Subject: [PATCH 07/39] Additional motif trimming options

---
 README.md             |  2 +-
 pyproject.toml        |  2 +-
 src/finemo/data_io.py | 31 +++++++++++++++++++++++++++----
 src/finemo/main.py    | 39 +++++++++++++++++++++++++++------------
 4 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 2e30598..c71ded2 100644
--- a/README.md
+++ b/README.md
@@ -133,7 +133,7 @@ Usage: `finemo call-hits -r <regions> -m <modisco_h5> -o <out_dir> [-p <peaks>]
 - `-r/--regions`: A `.npz` file of input sequences, contributions, and coordinates. Created with a `finemo extract-regions-*` command.
 - `-m/--modisco-h5`: A tfmodisco-lite output H5 file of motif patterns.
 - `-o/--out-dir`: The path to the output directory.
-- `-t/--cwm-trim-threshold`: The threshold to determine motif start and end positions within the full CWMs. Default is 0.3.
+- `-t/--cwm-trim-threshold`: The threshold to determine motif start and end positions within the full CWMs. Default is 0.3. If you need finer control over motif trimming, check out the `-T/--cwm-trim-thresholds` and `-R/--cwm-trim-coords` options.
 - `-l/--global-lambda`: The L1 regularization weight determining the sparsity of hits. Default is 0.7.
 - `-b/--batch-size`: The batch size used for optimization. Default is 2000.
 - `-J/--compile`: Enable JIT compilation for faster execution. This option may not work on older GPUs.
diff --git a/pyproject.toml b/pyproject.toml
index 9e9d76a..e66a29a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "finemo"
 description = "Identification of regulatory elements from neural network contribution scores for DNA."
 keywords = ["deep learning", "genomics"]
-version = "0.33"
+version = "0.34"
 readme = "README.md"
 license = {file = "LICENSE"}
 authors = [
diff --git a/src/finemo/data_io.py b/src/finemo/data_io.py
index 35e36e9..7091fa2 100644
--- a/src/finemo/data_io.py
+++ b/src/finemo/data_io.py
@@ -32,6 +32,17 @@ def load_mapping(path, type):
 
     return mapping
 
+def load_mapping_tuple(path, type):
+    mapping = {}
+    with open(path) as f:
+        for line in f:
+            entries = line.rstrip("\n").split("\t")
+            key = entries[0]
+            val = entries[1:]
+            mapping[key] = tuple(type(i) for i in val)
+
+    return mapping
+
 
 NARROWPEAK_SCHEMA = ["chr", "peak_start", "peak_end", "peak_name", "peak_score", 
                      "peak_strand", "peak_signal", "peak_pval", "peak_qval", "peak_summit"]
@@ -238,8 +249,8 @@ def _motif_name_sort_key(data):
 
 MODISCO_PATTERN_GROUPS = ['pos_patterns', 'neg_patterns']
 
-def load_modisco_motifs(modisco_h5_path, trim_threshold, motif_type, motifs_include, 
-                        motif_name_map, motif_lambdas, motif_lambda_default, include_rc):
+def load_modisco_motifs(modisco_h5_path, trim_coords, trim_thresholds, trim_threshold_default, motif_type, 
+                        motifs_include, motif_name_map, motif_lambdas, motif_lambda_default, include_rc):
     """
     Adapted from https://github.com/jmschrei/tfmodisco-lite/blob/570535ee5ccf43d670e898d92d63af43d68c38c5/modiscolite/report.py#L252-L272
     """
@@ -257,6 +268,11 @@ def load_modisco_motifs(modisco_h5_path, trim_threshold, motif_type, motifs_incl
     if motif_lambdas is None:
         motif_lambdas = {}
 
+    if trim_coords is None:
+        trim_coords = {}
+    if trim_thresholds is None:
+        trim_thresholds = {}
+
     if len(motif_name_map.values()) != len(set(motif_name_map.values())):
         raise ValueError("Specified motif names are not unique")
 
@@ -281,8 +297,15 @@ def load_modisco_motifs(modisco_h5_path, trim_threshold, motif_type, motifs_incl
 
                 cwm_fwd = cwm_raw / cwm_norm
                 cwm_rev = cwm_fwd[::-1,::-1]
-                start_fwd, end_fwd = trim_motif(cwm_fwd, trim_threshold)
-                start_rev, end_rev = trim_motif(cwm_rev, trim_threshold)
+
+                if pattern_tag in trim_coords:
+                    start_fwd, end_fwd = trim_coords[pattern_tag]
+                else:
+                    trim_threshold = trim_thresholds.get(pattern_tag, trim_threshold_default)
+                    start_fwd, end_fwd = trim_motif(cwm_fwd, trim_threshold)
+
+                cwm_len = cwm_fwd.shape[1]
+                start_rev, end_rev = cwm_len - end_fwd, cwm_len - start_fwd
                 
                 trim_mask_fwd = np.zeros(cwm_fwd.shape[1], dtype=np.int8)
                 trim_mask_fwd[start_fwd:end_fwd] = 1
diff --git a/src/finemo/main.py b/src/finemo/main.py
index 1fed513..4897974 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -54,8 +54,9 @@ def extract_regions_modisco_fmt(peaks_path, chrom_order_path, shaps_paths, ohe_p
 
 
 def call_hits(regions_path, peaks_path, modisco_h5_path, chrom_order_path, motifs_include_path, motif_names_path, 
-              motif_lambdas_path, out_dir, cwm_trim_threshold, lambda_default, step_size_max, step_size_min, sqrt_transform,
-              convergence_tol, max_steps, batch_size, step_adjust, device, mode, no_post_filter, compile_optimizer):
+              motif_lambdas_path, out_dir, cwm_trim_coords_path, cwm_trim_thresholds_path, cwm_trim_threshold_default, 
+              lambda_default, step_size_max, step_size_min, sqrt_transform, convergence_tol, max_steps, batch_size, 
+              step_adjust, device, mode, no_post_filter, compile_optimizer):
     
     params = locals()
     from . import hitcaller
@@ -107,9 +108,19 @@ def call_hits(regions_path, peaks_path, modisco_h5_path, chrom_order_path, motif
         motif_lambdas = data_io.load_mapping(motif_lambdas_path, float)
     else:
         motif_lambdas = None
+
+    if cwm_trim_coords_path is not None:
+        trim_coords = data_io.load_mapping_tuple(cwm_trim_coords_path, int)
+    else:
+        trim_coords = None
+
+    if cwm_trim_thresholds_path is not None:
+        trim_thresholds = data_io.load_mapping(cwm_trim_thresholds_path, float)
+    else:
+        trim_thresholds = None
     
-    motifs_df, cwms, trim_masks, motif_names = data_io.load_modisco_motifs(modisco_h5_path, cwm_trim_threshold, motif_type, motifs_include, 
-                                                                           motif_name_map, motif_lambdas, lambda_default, True)
+    motifs_df, cwms, trim_masks, motif_names = data_io.load_modisco_motifs(modisco_h5_path, trim_coords, trim_thresholds, cwm_trim_threshold_default, 
+                                                                           motif_type, motifs_include, motif_name_map, motif_lambdas, lambda_default, True)
     num_motifs = cwms.shape[0]
     motif_width = cwms.shape[2]
     lambdas = motifs_df.get_column("lambda").to_numpy(writable=True)
@@ -173,8 +184,8 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
         else:
             motif_name_map = None
 
-        motifs_df, cwms_modisco, trim_masks, motif_names = data_io.load_modisco_motifs(modisco_h5_path, cwm_trim_threshold, "cwm", 
-                                                                                    motifs_include, motif_name_map, None, None, True)
+        motifs_df, cwms_modisco, trim_masks, motif_names = data_io.load_modisco_motifs(modisco_h5_path, None, None, cwm_trim_threshold, "cwm", 
+                                                                                       motifs_include, motif_name_map, None, None, True)
 
     else:
         hits_df_path = os.path.join(hits_dir, "hits.tsv")
@@ -188,7 +199,7 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
 
         params_path = os.path.join(hits_dir, "parameters.json")
         params = data_io.load_params(params_path)
-        cwm_trim_threshold = params["cwm_trim_threshold"]
+        cwm_trim_threshold = params["cwm_trim_threshold_default"]
 
     if not use_seqlets:
         warnings.warn("Usage of the `--no-seqlets` flag is deprecated and will be removed in a future version. Please omit the `--modisco-h5` argument instead.")
@@ -376,10 +387,14 @@ def cli():
         help="The path to the output directory.")
     
     call_hits_parser.add_argument("-t", "--cwm-trim-threshold", type=float, default=0.3,
-        help="The threshold to determine motif start and end positions within the full CWMs.")
+        help="The default threshold to determine motif start and end positions within the full CWMs.")
+    call_hits_parser.add_argument("-T", "--cwm-trim-thresholds", type=str, default=None,
+        help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column and custom trim thresholds in the second column. Omitted motifs default to the `--cwm-trim-threshold` value.")
+    call_hits_parser.add_argument("-R", "--cwm-trim-coords", type=str, default=None,
+        help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column and custom trim start and end coordinates in the second and third columns, respectively. Omitted motifs default to `--cwm-trim-thresholds` values.")
     
     call_hits_parser.add_argument("-l", "--global-lambda", type=float, default=0.7,
-        help="The L1 regularization weight determining the sparsity of hits.")
+        help="The default L1 regularization weight determining the sparsity of hits.")
     call_hits_parser.add_argument("-L", "--motif-lambdas", type=str, default=None,
         help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column and motif-specific lambdas in the second column. Omitted motifs default to the `--global-lambda` value.")
     call_hits_parser.add_argument("-a", "--alpha", type=float, default=None,
@@ -487,9 +502,9 @@ def cli():
             args.motif_lambdas = args.motif_alphas
 
         call_hits(args.regions, args.peaks, args.modisco_h5, args.chrom_order, args.motifs_include, args.motif_names, 
-                  args.motif_lambdas, args.out_dir, args.cwm_trim_threshold, args.global_lambda, args.step_size_max, 
-                  args.step_size_min, args.sqrt_transform, args.convergence_tol, args.max_steps, args.batch_size, 
-                  args.step_adjust, args.device, args.mode, args.no_post_filter, args.compile)
+                  args.motif_lambdas, args.out_dir, args.cwm_trim_coords, args.cwm_trim_thresholds, args.cwm_trim_threshold, 
+                  args.global_lambda, args.step_size_max, args.step_size_min, args.sqrt_transform, args.convergence_tol, 
+                  args.max_steps, args.batch_size, args.step_adjust, args.device, args.mode, args.no_post_filter, args.compile)
 
     elif args.cmd == "report":
         if args.no_recall and not args.no_seqlets:

From 58ebbd8362bf45510dfe7044adf2daed543746c3 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Tue, 29 Apr 2025 05:59:07 -0700
Subject: [PATCH 08/39] Switch to fraction threshold in `collapse-hits`

---
 README.md                    |  2 +-
 pyproject.toml               |  2 +-
 src/finemo/main.py           | 10 +++++-----
 src/finemo/postprocessing.py | 10 +++++-----
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index c71ded2..623b220 100644
--- a/README.md
+++ b/README.md
@@ -235,7 +235,7 @@ Usage: `usage: finemo collapse-hits -i <hits> -o <out_path> [-O <overlap>]`
 
 - `-i/--hits`: The path to the input hits file. This should be the `hits.tsv` or `hits_unique.tsv` file generated by the `finemo call-hits` command.
 - `-o/--out-path`: The path to the output file. This will be a copy of the input file with an additional `is_primary` column.
-- `-O/--overlap`: The minimum overlap (in base pairs) required for two hits to be considered overlapping. Default is 3 bp.
+- `-O/--overlap-frac`: The minimum fraction overlap required for two hits to be considered overlapping. Precisely, given two hits of lengths `x` and `y`, the minimum number of overlapping bases is `overlap_frac * (x + y) / 2`. Default is 0.2.
 
 #### `finemo intersect-hits`
 
diff --git a/pyproject.toml b/pyproject.toml
index e66a29a..8c68141 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "finemo"
 description = "Identification of regulatory elements from neural network contribution scores for DNA."
 keywords = ["deep learning", "genomics"]
-version = "0.34"
+version = "0.35"
 readme = "README.md"
 license = {file = "LICENSE"}
 authors = [
diff --git a/src/finemo/main.py b/src/finemo/main.py
index 4897974..50be9b2 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -245,11 +245,11 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
     evaluation.write_report(report_df, motif_names, report_path, compute_recall, seqlets_df is not None)
 
 
-def collapse_hits(hits_path, out_path, overlap):
+def collapse_hits(hits_path, out_path, overlap_frac):
     from . import postprocessing
 
     hits_df = data_io.load_hits(hits_path, lazy=False)
-    hits_collapsed_df = postprocessing.collapse_hits(hits_df, overlap)
+    hits_collapsed_df = postprocessing.collapse_hits(hits_df, overlap_frac)
 
     data_io.write_hits_processed(hits_collapsed_df, out_path, schema=data_io.HITS_COLLAPSED_DTYPES)
 
@@ -460,8 +460,8 @@ def cli():
         help="The `hits.tsv` or `hits_unique.tsv` file from `call-hits`.")
     collapse_hits_parser.add_argument("-o", "--out-path", type=str, required=True,
         help="The path to the output .tsv file with an additional \"is_primary\" column.")
-    collapse_hits_parser.add_argument("-O", "--overlap", type=int, default=3,
-        help="The minimum number of base pairs to consider as overlapping.")
+    collapse_hits_parser.add_argument("-O", "--overlap-frac", type=float, default=0.2,
+        help="The threshold for determining overlapping hits. For two hits with lengths x and y, the minimum overlap is defined as `overlap_frac * (x + y) / 2`. The default value of 0.2 means that two hits must overlap by at least 20% of their average lengths to be considered overlapping.")
     
 
     intersect_hits_parser = subparsers.add_parser("intersect-hits", formatter_class=argparse.ArgumentDefaultsHelpFormatter,
@@ -515,7 +515,7 @@ def cli():
                not args.no_recall, not args.no_seqlets)
 
     elif args.cmd == "collapse-hits":
-        collapse_hits(args.hits, args.out_path, args.overlap)
+        collapse_hits(args.hits, args.out_path, args.overlap_frac)
 
     elif args.cmd == "intersect-hits":
         intersect_hits(args.hits, args.out_path, args.relaxed)
diff --git a/src/finemo/postprocessing.py b/src/finemo/postprocessing.py
index 364f371..5663a4a 100644
--- a/src/finemo/postprocessing.py
+++ b/src/finemo/postprocessing.py
@@ -37,7 +37,7 @@ def _collapse_hits(chrom_ids, starts, ends, similarities):
     return out
 
 
-def collapse_hits(hits_df, overlap):
+def collapse_hits(hits_df, overlap_frac):
     chroms = hits_df["chr"].unique(maintain_order=True)
 
     if not chroms.is_empty():
@@ -46,15 +46,15 @@ def collapse_hits(hits_df, overlap):
         }
         df = hits_df.select(
             chrom_id=pl.col("chr").replace_strict(chrom_to_id, return_dtype=pl.UInt32),
-            start_trim=pl.col("start") * 2 + overlap,
-            end_trim=pl.col("end") * 2 - overlap,
+            start_trim=pl.col("start") * 2 + ((pl.col("end") - pl.col("start")) * overlap_frac).cast(pl.Int32),
+            end_trim=pl.col("end") * 2 - ((pl.col("end") - pl.col("start")) * overlap_frac).cast(pl.Int32),
             similarity=pl.col("hit_similarity")
         )
     else:
         df = hits_df.select(
             chrom_id=pl.col("peak_id"),
-            start_trim=pl.col("start") * 2 + overlap,
-            end_trim=pl.col("end") * 2 - overlap,
+            start_trim=pl.col("start") * 2 + ((pl.col("end") - pl.col("start")) * overlap_frac).cast(pl.Int32),
+            end_trim=pl.col("end") * 2 - ((pl.col("end") - pl.col("start")) * overlap_frac).cast(pl.Int32),
             similarity=pl.col("hit_similarity")
         )
 

From 8a998154d32e427e92b436881f566f34f984a957 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Thu, 1 May 2025 06:16:10 -0700
Subject: [PATCH 09/39] Confusion matrix visualization

---
 pyproject.toml                   |   2 +-
 src/finemo/data_io.py            |   4 ++
 src/finemo/evaluation.py         | 117 ++++++++++++++++++++++++++++++-
 src/finemo/main.py               |   7 ++
 src/finemo/templates/report.html |  12 ++++
 5 files changed, 140 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8c68141..e527bce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "finemo"
 description = "Identification of regulatory elements from neural network contribution scores for DNA."
 keywords = ["deep learning", "genomics"]
-version = "0.35"
+version = "0.36"
 readme = "README.md"
 license = {file = "LICENSE"}
 authors = [
diff --git a/src/finemo/data_io.py b/src/finemo/data_io.py
index 7091fa2..1088d68 100644
--- a/src/finemo/data_io.py
+++ b/src/finemo/data_io.py
@@ -595,6 +595,10 @@ def write_occ_df(occ_df, out_path):
     occ_df.write_csv(out_path, separator="\t")
 
 
+def write_seqlet_confusion_df(seqlet_confusion_df, out_path):
+    seqlet_confusion_df.write_csv(out_path, separator="\t")
+
+
 def write_report_data(report_df, cwms, out_dir):
     cwms_dir = os.path.join(out_dir, "CWMs")
     os.makedirs(cwms_dir, exist_ok=True)
diff --git a/src/finemo/evaluation.py b/src/finemo/evaluation.py
index 72358c5..ff85d73 100644
--- a/src/finemo/evaluation.py
+++ b/src/finemo/evaluation.py
@@ -160,7 +160,7 @@ def plot_peak_motif_indicator_heatmap(peak_hit_counts, motif_names, output_dir):
     fig, ax = plt.subplots(figsize=(8, 8))
     
     # Plot the heatmap
-    ax.imshow(matrix, interpolation="nearest", aspect="auto", cmap="Greens")
+    cax = ax.imshow(matrix, interpolation="nearest", aspect="equal", cmap="Greens")
 
     # Set axes on heatmap
     ax.set_yticks(np.arange(len(motif_keys)))
@@ -170,6 +170,11 @@ def plot_peak_motif_indicator_heatmap(peak_hit_counts, motif_names, output_dir):
     ax.set_xlabel("Motif i")
     ax.set_ylabel("Motif j")
 
+    ax.tick_params(axis='both', labelsize=8)
+
+    cbar = fig.colorbar(cax, ax=ax, orientation="vertical", shrink=0.6, aspect=30)
+    cbar.ax.tick_params(labelsize=8) 
+    
     output_path_png = os.path.join(output_dir, "motif_cooocurrence.png")
     plt.savefig(output_path_png, dpi=300)
     output_path_svg = os.path.join(output_dir, "motif_cooocurrence.svg")
@@ -178,6 +183,35 @@ def plot_peak_motif_indicator_heatmap(peak_hit_counts, motif_names, output_dir):
     plt.close()
 
 
+def plot_seqlet_confusion_heatmap(seqlet_confusion, motif_names, output_dir):
+    motif_keys = [abbreviate_motif_name(m) for m in motif_names]
+
+    fig, ax = plt.subplots(figsize=(8, 8))
+    
+    # Plot the heatmap
+    cax = ax.imshow(seqlet_confusion, interpolation="nearest", aspect="equal", cmap="Blues")
+
+    # Set axes on heatmap
+    ax.set_yticks(np.arange(len(motif_keys)))
+    ax.set_yticklabels(motif_keys)
+    ax.set_xticks(np.arange(len(motif_keys)))
+    ax.set_xticklabels(motif_keys, rotation=90)
+    ax.set_xlabel("Hit motif")
+    ax.set_ylabel("Seqlet motif")
+
+    ax.tick_params(axis='both', labelsize=8)
+
+    cbar = fig.colorbar(cax, ax=ax, orientation="vertical", shrink=0.6, aspect=30)
+    cbar.ax.tick_params(labelsize=8) 
+
+    output_path_png = os.path.join(output_dir, "seqlet_confusion.png")
+    plt.savefig(output_path_png, dpi=300)
+    output_path_svg = os.path.join(output_dir, "seqlet_confusion.svg")
+    plt.savefig(output_path_svg)
+
+    plt.close()
+
+
 def get_cwms(regions, positions_df, motif_width):
     idx_df = (
         positions_df
@@ -366,6 +400,87 @@ def tfmodisco_comparison(regions, hits_df, peaks_df, seqlets_df, motifs_df, cwms
     return report_data, report_df, cwms, cwm_trim_bounds
 
 
+def seqlet_confusion(hits_df, seqlets_df, peaks_df, motif_names, motif_width):
+    bin_size = motif_width - 1
+
+    hits_binned = (
+        hits_df
+        .with_columns(
+            peak_id=pl.col('peak_id').cast(pl.UInt32),
+            is_revcomp=pl.col("strand") == '-'
+        )
+        .join(
+            peaks_df.lazy(), on="peak_id", how="inner"
+        )
+        .unique(subset=["chr_id", "start_untrimmed", "motif_name", "is_revcomp"])
+        .select(
+            chr_id=pl.col("chr_id"),
+            start_bin=pl.col("start_untrimmed") // bin_size,
+            end_bin=pl.col("end_untrimmed") // bin_size,
+            motif_name=pl.col("motif_name")
+        )
+    )
+
+    seqlets_binned = (
+        seqlets_df
+        .select(
+            chr_id=pl.col("chr_id"),
+            start_bin=pl.col("start_untrimmed") // bin_size,
+            end_bin=pl.col("end_untrimmed") // bin_size,
+            motif_name=pl.col("motif_name")
+        )
+    )
+
+    overlaps_df = (
+        seqlets_binned.join(
+            hits_binned, 
+            on=["chr_id", "start_bin", "end_bin"],
+            how="inner",
+            suffix="_hits"
+        )
+    )
+
+    seqlet_counts = seqlets_binned.group_by("motif_name").len(name="num_seqlets")
+    overlap_counts = overlaps_df.group_by(["motif_name", "motif_name_hits"]).len(name="num_overlaps")
+
+    num_motifs = len(motif_names)
+    confusion_mat = np.zeros((num_motifs, num_motifs), dtype=np.float32)
+    name_to_idx = {m: i for i, m in enumerate(motif_names)}
+
+    confusion_df = (
+        overlap_counts
+        .join(
+            seqlet_counts,
+            on="motif_name",
+            how="inner"
+        )
+        .select(
+            motif_name_seqlets=pl.col("motif_name"),
+            motif_name_hits=pl.col("motif_name_hits"),
+            frac_overlap=pl.col("num_overlaps") / pl.col("num_seqlets"),
+        )
+        .collect()
+    )
+
+    confusion_idx_df = (
+        confusion_df
+        .select(
+            row_idx=pl.col("motif_name_seqlets").replace_strict(name_to_idx),
+            col_idx=pl.col("motif_name_hits").replace_strict(name_to_idx),
+            frac_overlap=pl.col("frac_overlap")
+        )
+    )
+
+    row_idx = confusion_idx_df["row_idx"].to_numpy()
+    col_idx = confusion_idx_df["col_idx"].to_numpy()
+    frac_overlap = confusion_idx_df["frac_overlap"].to_numpy()
+
+    confusion_mat[row_idx, col_idx] = frac_overlap
+    
+    return confusion_df, confusion_mat
+
+
+
 class LogoGlyph(AbstractPathEffect):
     def __init__(self, glyph, ref_glyph='E', font_props=None,
                  offset=(0., 0.), **kwargs):
diff --git a/src/finemo/main.py b/src/finemo/main.py
index 50be9b2..0f1101c 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -218,6 +218,9 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
                                                                                 cwms_modisco, motif_names, modisco_half_width, 
                                                                                 motif_width, compute_recall)
     
+    if seqlets_df is not None:
+        confusion_df, confusion_mat = evaluation.seqlet_confusion(hits_df, seqlets_df, peaks_df, motif_names, motif_width)
+    
     os.makedirs(out_dir, exist_ok=True)
     
     occ_path = os.path.join(out_dir, "motif_occurrences.tsv")
@@ -239,7 +242,11 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
         seqlets_path = os.path.join(out_dir, "seqlets.tsv")
         data_io.write_modisco_seqlets(seqlets_df, seqlets_path)
 
+        seqlet_confusion_path = os.path.join(out_dir, "seqlet_confusion.tsv")
+        data_io.write_seqlet_confusion_df(confusion_df, seqlet_confusion_path)
+
         evaluation.plot_hit_vs_seqlet_counts(report_data, out_dir)
+        evaluation.plot_seqlet_confusion_heatmap(confusion_mat, motif_names, out_dir)
 
     report_path = os.path.join(out_dir, "report.html")
     evaluation.write_report(report_df, motif_names, report_path, compute_recall, seqlets_df is not None)
diff --git a/src/finemo/templates/report.html b/src/finemo/templates/report.html
index 0b2ed57..8465ead 100644
--- a/src/finemo/templates/report.html
+++ b/src/finemo/templates/report.html
@@ -323,6 +323,18 @@ <h3>Hit and seqlet motif comparisons</h3>
     </tbody>
 </table>
 
+{% if compute_recall %}
+
+<h3>Seqlet-hit confusion matrix</h3>
+<p>
+    This heatmap shows the prevalence of motifs whose (untrimmed) hits overlap with TF-MoDISco seqlets of other motifs.
+    The vertical axis shows the motif of the seqlet, while the horizontal axis shows the motif of the hit.
+    The color intensity here represents an estimator of the expected number of bases of hit overlap per base of seqlet. 
+</p>
+<img src="seqlet_confusion.svg" width="780">
+
+{% endif %}
+
 <h2>Hit statistic distributions</h2>
 <p>
     The following figures visualize the distribution of hit statistics across motifs and regions. 

From 2cb2bf2dc1b5735b29ce18e48339f28cbbee048d Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Thu, 1 May 2025 11:03:50 -0700
Subject: [PATCH 10/39] Adjust plot padding

---
 src/finemo/evaluation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/finemo/evaluation.py b/src/finemo/evaluation.py
index ff85d73..5e44529 100644
--- a/src/finemo/evaluation.py
+++ b/src/finemo/evaluation.py
@@ -157,7 +157,7 @@ def plot_peak_motif_indicator_heatmap(peak_hit_counts, motif_names, output_dir):
     matrix = peak_hit_counts * cov_norm[:,None] * cov_norm[None,:]
     motif_keys = [abbreviate_motif_name(m) for m in motif_names]
 
-    fig, ax = plt.subplots(figsize=(8, 8))
+    fig, ax = plt.subplots(figsize=(8, 8), layout='constrained')
     
     # Plot the heatmap
     cax = ax.imshow(matrix, interpolation="nearest", aspect="equal", cmap="Greens")
@@ -186,7 +186,7 @@ def plot_peak_motif_indicator_heatmap(peak_hit_counts, motif_names, output_dir):
 def plot_seqlet_confusion_heatmap(seqlet_confusion, motif_names, output_dir):
     motif_keys = [abbreviate_motif_name(m) for m in motif_names]
 
-    fig, ax = plt.subplots(figsize=(8, 8))
+    fig, ax = plt.subplots(figsize=(8, 8), layout='constrained')
     
     # Plot the heatmap
     cax = ax.imshow(seqlet_confusion, interpolation="nearest", aspect="equal", cmap="Blues")
@@ -585,7 +585,7 @@ def plot_hit_vs_seqlet_counts(recall_data, output_dir):
 
     lim = max(np.amax(x), np.amax(y))
 
-    fig, ax = plt.subplots(figsize=(8,8))
+    fig, ax = plt.subplots(figsize=(8,8), layout='constrained')
     ax.axline((0, 0), (lim, lim), color="0.3", linewidth=0.7, linestyle=(0, (5, 5)))
     ax.scatter(x, y, s=5)
     for i, txt in enumerate(m):

From 61182f7794e5fb387dad9cc06b8052ddf7ff49f9 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Thu, 1 May 2025 15:38:53 -0700
Subject: [PATCH 11/39] Adjust confusion matrix calculation

---
 src/finemo/evaluation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/finemo/evaluation.py b/src/finemo/evaluation.py
index 5e44529..488cd48 100644
--- a/src/finemo/evaluation.py
+++ b/src/finemo/evaluation.py
@@ -401,7 +401,7 @@ def tfmodisco_comparison(regions, hits_df, peaks_df, seqlets_df, motifs_df, cwms
 
 
 def seqlet_confusion(hits_df, seqlets_df, peaks_df, motif_names, motif_width):
-    bin_size = motif_width - 1
+    bin_size = motif_width
 
     hits_binned = (
         hits_df

From 1947e80e79d1fd88cce1ee55fa59f2f3b92442c0 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 5 May 2025 15:16:42 -0700
Subject: [PATCH 12/39] Refactor visualization and eval code

---
 src/finemo/evaluation.py    | 315 ------------------------------------
 src/finemo/main.py          |  18 +--
 src/finemo/visualization.py | 313 +++++++++++++++++++++++++++++++++++
 3 files changed, 321 insertions(+), 325 deletions(-)
 create mode 100644 src/finemo/visualization.py

diff --git a/src/finemo/evaluation.py b/src/finemo/evaluation.py
index 488cd48..98bb919 100644
--- a/src/finemo/evaluation.py
+++ b/src/finemo/evaluation.py
@@ -1,36 +1,7 @@
-import os
 import warnings
-import importlib
 
 import numpy as np
 import polars as pl
-import matplotlib.pyplot as plt
-from matplotlib.patheffects import AbstractPathEffect
-from matplotlib.textpath import TextPath
-from matplotlib.transforms import Affine2D
-from matplotlib.font_manager import FontProperties
-from jinja2 import Template
-
-from . import templates
-
-
-def abbreviate_motif_name(name):
-    try:
-        group, motif = name.split(".")
-
-        if group == "pos_patterns":
-            group_short = "+"
-        elif group == "neg_patterns":
-            group_short = "-"
-        else:
-            raise Exception
-
-        motif_num = motif.split("_")[1]
-
-        return f"{group_short}/{motif_num}"
-    
-    except:
-        return name
 
 
 def get_motif_occurences(hits_df, motif_names):
@@ -62,156 +33,6 @@ def get_motif_occurences(hits_df, motif_names):
     return occ_df, coocc
 
 
-def plot_hit_stat_distributions(hits_df, motif_names, plot_dir):
-    hits_df = hits_df.collect()
-    hits_by_motif = hits_df.partition_by("motif_name", as_dict=True)
-    dummy_df = hits_df.clear()
-
-    motifs_dir = os.path.join(plot_dir, "motif_stat_distributions")
-    os.makedirs(motifs_dir, exist_ok=True)
-    for m in motif_names:
-        hits = hits_by_motif.get((m,), dummy_df)
-        coefficients = hits.get_column("hit_coefficient_global").to_numpy()
-        similarities = hits.get_column("hit_similarity").to_numpy()
-        importances = hits.get_column("hit_importance").to_numpy()
-
-        fig, ax = plt.subplots(figsize=(5, 2))
-
-        ax.hist(coefficients, bins=50, density=True)
-
-        output_path_png = os.path.join(motifs_dir, f"{m}_coefficients.png")
-        plt.savefig(output_path_png, dpi=300)
-        output_path_svg = os.path.join(motifs_dir, f"{m}_coefficients.svg")
-        plt.savefig(output_path_svg)
-        plt.close(fig)
-
-        fig, ax = plt.subplots(figsize=(5, 2))
-
-        ax.hist(similarities, bins=50, density=True)
-
-        output_path_png = os.path.join(motifs_dir, f"{m}_similarities.png")
-        plt.savefig(output_path_png, dpi=300)
-        output_path_svg = os.path.join(motifs_dir, f"{m}_similarities.svg")
-        plt.savefig(output_path_svg)
-        plt.close(fig)
-
-        fig, ax = plt.subplots(figsize=(5, 2))
-
-        ax.hist(importances, bins=50, density=True)
-
-        output_path_png = os.path.join(motifs_dir, f"{m}_importances.png")
-        plt.savefig(output_path_png, dpi=300)
-        output_path_svg = os.path.join(motifs_dir, f"{m}_importances.svg")
-        plt.savefig(output_path_svg)
-        plt.close(fig)
-
-
-def plot_hit_peak_distributions(occ_df, motif_names, plot_dir):
-    motifs_dir = os.path.join(plot_dir, "motif_hit_distributions")
-    os.makedirs(motifs_dir, exist_ok=True)
-
-    for m in motif_names:
-        fig, ax = plt.subplots(figsize=(5, 2))
-
-        unique, counts = np.unique(occ_df.get_column(m), return_counts=True)
-        freq = counts / counts.sum()
-        num_bins = np.amax(unique, initial=0) + 1
-        x = np.arange(num_bins)
-        y = np.zeros(num_bins)
-        y[unique] = freq
-        ax.bar(x, y)
-
-        output_path_png = os.path.join(motifs_dir, f"{m}.png")
-        plt.savefig(output_path_png, dpi=300)
-        output_path_svg = os.path.join(motifs_dir, f"{m}.svg")
-        plt.savefig(output_path_svg)
-
-        plt.close(fig)
-    
-    fig, ax = plt.subplots(figsize=(8, 4))
-
-    unique, counts = np.unique(occ_df.get_column("total"), return_counts=True)
-    freq = counts / counts.sum()
-    num_bins = np.amax(unique, initial=0) + 1
-    x = np.arange(num_bins)
-    y = np.zeros(num_bins)
-    y[unique] = freq
-    ax.bar(x, y)
-
-    ax.set_xlabel("Total hits per region")
-    ax.set_ylabel("Frequency")
-
-    output_path_png = os.path.join(plot_dir, "total_hit_distribution.png")
-    plt.savefig(output_path_png, dpi=300)
-    output_path_svg = os.path.join(plot_dir, "total_hit_distribution.svg")
-    plt.savefig(output_path_svg, dpi=300)
-
-    plt.close(fig)
-
-
-def plot_peak_motif_indicator_heatmap(peak_hit_counts, motif_names, output_dir):
-    """
-    Plots a simple indicator heatmap of the motifs in each peak.
-    """
-    cov_norm = 1 / np.sqrt(np.diag(peak_hit_counts))
-    matrix = peak_hit_counts * cov_norm[:,None] * cov_norm[None,:]
-    motif_keys = [abbreviate_motif_name(m) for m in motif_names]
-
-    fig, ax = plt.subplots(figsize=(8, 8), layout='constrained')
-    
-    # Plot the heatmap
-    cax = ax.imshow(matrix, interpolation="nearest", aspect="equal", cmap="Greens")
-
-    # Set axes on heatmap
-    ax.set_yticks(np.arange(len(motif_keys)))
-    ax.set_yticklabels(motif_keys)
-    ax.set_xticks(np.arange(len(motif_keys)))
-    ax.set_xticklabels(motif_keys, rotation=90)
-    ax.set_xlabel("Motif i")
-    ax.set_ylabel("Motif j")
-
-    ax.tick_params(axis='both', labelsize=8)
-
-    cbar = fig.colorbar(cax, ax=ax, orientation="vertical", shrink=0.6, aspect=30)
-    cbar.ax.tick_params(labelsize=8) 
-    
-    output_path_png = os.path.join(output_dir, "motif_cooocurrence.png")
-    plt.savefig(output_path_png, dpi=300)
-    output_path_svg = os.path.join(output_dir, "motif_cooocurrence.svg")
-    plt.savefig(output_path_svg)
-
-    plt.close()
-
-
-def plot_seqlet_confusion_heatmap(seqlet_confusion, motif_names, output_dir):
-    motif_keys = [abbreviate_motif_name(m) for m in motif_names]
-
-    fig, ax = plt.subplots(figsize=(8, 8), layout='constrained')
-    
-    # Plot the heatmap
-    cax = ax.imshow(seqlet_confusion, interpolation="nearest", aspect="equal", cmap="Blues")
-
-    # Set axes on heatmap
-    ax.set_yticks(np.arange(len(motif_keys)))
-    ax.set_yticklabels(motif_keys)
-    ax.set_xticks(np.arange(len(motif_keys)))
-    ax.set_xticklabels(motif_keys, rotation=90)
-    ax.set_xlabel("Hit motif")
-    ax.set_ylabel("Seqlet motif")
-
-    ax.tick_params(axis='both', labelsize=8)
-
-    cbar = fig.colorbar(cax, ax=ax, orientation="vertical", shrink=0.6, aspect=30)
-    cbar.ax.tick_params(labelsize=8) 
-
-    output_path_png = os.path.join(output_dir, "seqlet_confusion.png")
-    plt.savefig(output_path_png, dpi=300)
-    output_path_svg = os.path.join(output_dir, "seqlet_confusion.svg")
-    plt.savefig(output_path_svg)
-
-    plt.close()
-
-
 def get_cwms(regions, positions_df, motif_width):
     idx_df = (
         positions_df
@@ -480,139 +301,3 @@ def seqlet_confusion(hits_df, seqlets_df, peaks_df, motif_names, motif_width):
     return confusion_df, confusion_mat
 
 
-
-class LogoGlyph(AbstractPathEffect):
-    def __init__(self, glyph, ref_glyph='E', font_props=None,
-                 offset=(0., 0.), **kwargs):
-
-        super().__init__(offset)
-
-        path_orig = TextPath((0, 0), glyph, size=1, prop=font_props)
-        dims = path_orig.get_extents()
-        ref_dims = TextPath((0, 0), ref_glyph, size=1, prop=font_props).get_extents()
-
-        h_scale = 1 / dims.height
-        ref_width = max(dims.width, ref_dims.width)
-        w_scale = 1 / ref_width
-        w_shift = (1 - dims.width / ref_width) / 2
-        x_shift = -dims.x0
-        y_shift = -dims.y0
-        stretch = (
-            Affine2D()
-            .translate(tx=x_shift, ty=y_shift)
-            .scale(sx=w_scale, sy=h_scale)
-            .translate(tx=w_shift, ty=0)
-        )
-
-        self.path = stretch.transform_path(path_orig)
-
-        #: The dictionary of keywords to update the graphics collection with.
-        self._gc = kwargs
-
-    def draw_path(self, renderer, gc, tpath, affine, rgbFace):
-        return renderer.draw_path(gc, self.path, affine, rgbFace)
-
-
-def plot_logo(ax, heights, glyphs, colors=None, font_props=None, shade_bounds=None):
-    if colors is None:
-        colors = {g: None for g in glyphs}
-
-    ax.margins(x=0, y=0)
-    
-    pos_values = np.clip(heights, 0, None)
-    neg_values = np.clip(heights, None, 0)
-    pos_order = np.argsort(pos_values, axis=0)
-    neg_order = np.argsort(neg_values, axis=0)[::-1,:]
-    pos_reorder = np.argsort(pos_order, axis=0)
-    neg_reorder = np.argsort(neg_order, axis=0)
-    pos_offsets = np.take_along_axis(
-        np.cumsum(
-            np.take_along_axis(pos_values, pos_order, axis=0), axis=0
-        ), pos_reorder, axis=0
-    )
-    neg_offsets = np.take_along_axis(
-        np.cumsum(
-            np.take_along_axis(neg_values, neg_order, axis=0), axis=0
-        ), neg_reorder, axis=0
-    )
-    bottoms = pos_offsets + neg_offsets - heights
-
-    x = np.arange(heights.shape[1])
-
-    for glyph, height, bottom in zip(glyphs, heights, bottoms):
-        ax.bar(x, height, 0.95, bottom=bottom, 
-               path_effects=[LogoGlyph(glyph, font_props=font_props)], color=colors[glyph])
-
-    if shade_bounds is not None:
-        start, end = shade_bounds
-        ax.axvspan(start - 0.5, end - 0.5, color='0.9', zorder=-1)
-
-    ax.axhline(zorder=-1, linewidth=0.5, color='black')
-
-
-LOGO_ALPHABET = 'ACGT'
-LOGO_COLORS = {"A": '#109648', "C": '#255C99', "G": '#F7B32B', "T": '#D62839'}
-LOGO_FONT = FontProperties(weight="bold")
-
-def plot_cwms(cwms, trim_bounds, out_dir, alphabet=LOGO_ALPHABET, colors=LOGO_COLORS, font=LOGO_FONT):
-    for m, v in cwms.items():
-        motif_dir = os.path.join(out_dir, m)
-        os.makedirs(motif_dir, exist_ok=True)
-        for cwm_type, cwm in v.items():
-            fig, ax = plt.subplots(figsize=(10,2))
-
-            plot_logo(ax, cwm, alphabet, colors=colors, font_props=font, shade_bounds=trim_bounds[m][cwm_type])
-
-            for name, spine in ax.spines.items():
-                spine.set_visible(False)
-            
-            output_path_png = os.path.join(motif_dir, f"{cwm_type}.png")
-            plt.savefig(output_path_png, dpi=100)
-            output_path_svg = os.path.join(motif_dir, f"{cwm_type}.svg")
-            plt.savefig(output_path_svg)
-
-            plt.close(fig)
-
-
-def plot_hit_vs_seqlet_counts(recall_data, output_dir):
-    x = []
-    y = []
-    m = []
-    for k, v in recall_data.items():
-        x.append(v["num_hits_total"])
-        y.append(v["num_seqlets"])
-        m.append(k)
-
-    lim = max(np.amax(x), np.amax(y))
-
-    fig, ax = plt.subplots(figsize=(8,8), layout='constrained')
-    ax.axline((0, 0), (lim, lim), color="0.3", linewidth=0.7, linestyle=(0, (5, 5)))
-    ax.scatter(x, y, s=5)
-    for i, txt in enumerate(m):
-        short = abbreviate_motif_name(txt)
-        ax.annotate(short, (x[i], y[i]), fontsize=8, weight="bold")
-
-    ax.set_yscale('log')
-    ax.set_xscale('log')
-
-    ax.set_xlabel("Hits per motif")
-    ax.set_ylabel("Seqlets per motif")
-
-    output_path_png = os.path.join(output_dir, "hit_vs_seqlet_counts.png")
-    plt.savefig(output_path_png, dpi=300)
-    output_path_svg = os.path.join(output_dir, "hit_vs_seqlet_counts.svg")
-    plt.savefig(output_path_svg)
-
-    plt.close()
-
-
-def write_report(report_df, motif_names, out_path, compute_recall, use_seqlets):
-    template_str = importlib.resources.files(templates).joinpath('report.html').read_text()
-    template = Template(template_str)
-    report = template.render(report_data=report_df.iter_rows(named=True), 
-                             motif_names=motif_names, compute_recall=compute_recall, 
-                             use_seqlets=use_seqlets)
-    with open(out_path, "w") as f:
-        f.write(report)
-
-
diff --git a/src/finemo/main.py b/src/finemo/main.py
index 0f1101c..852147c 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -152,7 +152,7 @@ def call_hits(regions_path, peaks_path, modisco_h5_path, chrom_order_path, motif
 
 def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_path, motif_names_path, 
            out_dir, modisco_region_width, cwm_trim_threshold, compute_recall, use_seqlets):
-    from . import evaluation        
+    from . import evaluation, visualization     
 
     sequences, contribs, peaks_df, _ = data_io.load_regions_npz(regions_path)
     if len(contribs.shape) == 3:
@@ -228,14 +228,12 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
 
     data_io.write_report_data(report_df, cwms, out_dir)
 
-    evaluation.plot_hit_stat_distributions(hits_df, motif_names, out_dir)
-
-    evaluation.plot_hit_peak_distributions(occ_df, motif_names, out_dir)
-
-    evaluation.plot_peak_motif_indicator_heatmap(coooc, motif_names, out_dir)
+    visualization.plot_hit_stat_distributions(hits_df, motif_names, out_dir)
+    visualization.plot_hit_peak_distributions(occ_df, motif_names, out_dir)
+    visualization.plot_peak_motif_indicator_heatmap(coooc, motif_names, out_dir)
 
     plot_dir = os.path.join(out_dir, "CWMs")
-    evaluation.plot_cwms(cwms, trim_bounds, plot_dir)
+    visualization.plot_cwms(cwms, trim_bounds, plot_dir)
 
     if seqlets_df is not None:
         seqlets_df = seqlets_df.collect()
@@ -245,11 +243,11 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
         seqlet_confusion_path = os.path.join(out_dir, "seqlet_confusion.tsv")
         data_io.write_seqlet_confusion_df(confusion_df, seqlet_confusion_path)
 
-        evaluation.plot_hit_vs_seqlet_counts(report_data, out_dir)
-        evaluation.plot_seqlet_confusion_heatmap(confusion_mat, motif_names, out_dir)
+        visualization.plot_hit_vs_seqlet_counts(report_data, out_dir)
+        visualization.plot_seqlet_confusion_heatmap(confusion_mat, motif_names, out_dir)
 
     report_path = os.path.join(out_dir, "report.html")
-    evaluation.write_report(report_df, motif_names, report_path, compute_recall, seqlets_df is not None)
+    visualization.write_report(report_df, motif_names, report_path, compute_recall, seqlets_df is not None)
 
 
 def collapse_hits(hits_path, out_path, overlap_frac):
diff --git a/src/finemo/visualization.py b/src/finemo/visualization.py
new file mode 100644
index 0000000..fd348a1
--- /dev/null
+++ b/src/finemo/visualization.py
@@ -0,0 +1,313 @@
+import os
+import importlib
+
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.patheffects import AbstractPathEffect
+from matplotlib.textpath import TextPath
+from matplotlib.transforms import Affine2D
+from matplotlib.font_manager import FontProperties
+from jinja2 import Template
+
+from . import templates
+
+
+def abbreviate_motif_name(name):
+    try:
+        group, motif = name.split(".")
+        if group == "pos_patterns":
+            group_short = "+"
+        elif group == "neg_patterns":
+            group_short = "-"
+        else:
+            raise Exception
+        motif_num = motif.split("_")[1]
+        return f"{group_short}/{motif_num}"
+    except:
+        return name
+
+
+def plot_hit_stat_distributions(hits_df, motif_names, plot_dir):
+    hits_df = hits_df.collect()
+    hits_by_motif = hits_df.partition_by("motif_name", as_dict=True)
+    dummy_df = hits_df.clear()
+
+    motifs_dir = os.path.join(plot_dir, "motif_stat_distributions")
+    os.makedirs(motifs_dir, exist_ok=True)
+    for m in motif_names:
+        hits = hits_by_motif.get((m,), dummy_df)
+        coefficients = hits.get_column("hit_coefficient_global").to_numpy()
+        similarities = hits.get_column("hit_similarity").to_numpy()
+        importances = hits.get_column("hit_importance").to_numpy()
+
+        fig, ax = plt.subplots(figsize=(5, 2))
+
+        ax.hist(coefficients, bins=50, density=True)
+
+        output_path_png = os.path.join(motifs_dir, f"{m}_coefficients.png")
+        plt.savefig(output_path_png, dpi=300)
+        output_path_svg = os.path.join(motifs_dir, f"{m}_coefficients.svg")
+        plt.savefig(output_path_svg)
+        plt.close(fig)
+
+        fig, ax = plt.subplots(figsize=(5, 2))
+
+        ax.hist(similarities, bins=50, density=True)
+
+        output_path_png = os.path.join(motifs_dir, f"{m}_similarities.png")
+        plt.savefig(output_path_png, dpi=300)
+        output_path_svg = os.path.join(motifs_dir, f"{m}_similarities.svg")
+        plt.savefig(output_path_svg)
+        plt.close(fig)
+
+        fig, ax = plt.subplots(figsize=(5, 2))
+
+        ax.hist(importances, bins=50, density=True)
+
+        output_path_png = os.path.join(motifs_dir, f"{m}_importances.png")
+        plt.savefig(output_path_png, dpi=300)
+        output_path_svg = os.path.join(motifs_dir, f"{m}_importances.svg")
+        plt.savefig(output_path_svg)
+        plt.close(fig)
+
+
+def plot_hit_peak_distributions(occ_df, motif_names, plot_dir):
+    motifs_dir = os.path.join(plot_dir, "motif_hit_distributions")
+    os.makedirs(motifs_dir, exist_ok=True)
+
+    for m in motif_names:
+        fig, ax = plt.subplots(figsize=(5, 2))
+
+        unique, counts = np.unique(occ_df.get_column(m), return_counts=True)
+        freq = counts / counts.sum()
+        num_bins = np.amax(unique, initial=0) + 1
+        x = np.arange(num_bins)
+        y = np.zeros(num_bins)
+        y[unique] = freq
+        ax.bar(x, y)
+
+        output_path_png = os.path.join(motifs_dir, f"{m}.png")
+        plt.savefig(output_path_png, dpi=300)
+        output_path_svg = os.path.join(motifs_dir, f"{m}.svg")
+        plt.savefig(output_path_svg)
+
+        plt.close(fig)
+    
+    fig, ax = plt.subplots(figsize=(8, 4))
+
+    unique, counts = np.unique(occ_df.get_column("total"), return_counts=True)
+    freq = counts / counts.sum()
+    num_bins = np.amax(unique, initial=0) + 1
+    x = np.arange(num_bins)
+    y = np.zeros(num_bins)
+    y[unique] = freq
+    ax.bar(x, y)
+
+    ax.set_xlabel("Total hits per region")
+    ax.set_ylabel("Frequency")
+
+    output_path_png = os.path.join(plot_dir, "total_hit_distribution.png")
+    plt.savefig(output_path_png, dpi=300)
+    output_path_svg = os.path.join(plot_dir, "total_hit_distribution.svg")
+    plt.savefig(output_path_svg, dpi=300)
+
+    plt.close(fig)
+
+
+def plot_peak_motif_indicator_heatmap(peak_hit_counts, motif_names, output_dir):
+    """
+    Plots a simple indicator heatmap of the motifs in each peak.
+    """
+    cov_norm = 1 / np.sqrt(np.diag(peak_hit_counts))
+    matrix = peak_hit_counts * cov_norm[:, None] * cov_norm[None, :]
+    motif_keys = [abbreviate_motif_name(m) for m in motif_names]
+
+    fig, ax = plt.subplots(figsize=(8, 8), layout='constrained')
+    
+    # Plot the heatmap
+    cax = ax.imshow(matrix, interpolation="nearest", aspect="equal", cmap="Greens")
+
+    # Set axes on heatmap
+    ax.set_yticks(np.arange(len(motif_keys)))
+    ax.set_yticklabels(motif_keys)
+    ax.set_xticks(np.arange(len(motif_keys)))
+    ax.set_xticklabels(motif_keys, rotation=90)
+    ax.set_xlabel("Motif i")
+    ax.set_ylabel("Motif j")
+
+    ax.tick_params(axis='both', labelsize=8)
+
+    cbar = fig.colorbar(cax, ax=ax, orientation="vertical", shrink=0.6, aspect=30)
+    cbar.ax.tick_params(labelsize=8) 
+    
+    output_path_png = os.path.join(output_dir, "motif_cooocurrence.png")
+    plt.savefig(output_path_png, dpi=300)
+    output_path_svg = os.path.join(output_dir, "motif_cooocurrence.svg")
+    plt.savefig(output_path_svg)
+
+    plt.close()
+
+
+def plot_seqlet_confusion_heatmap(seqlet_confusion, motif_names, output_dir):
+    motif_keys = [abbreviate_motif_name(m) for m in motif_names]
+
+    fig, ax = plt.subplots(figsize=(8, 8), layout='constrained')
+    
+    # Plot the heatmap
+    cax = ax.imshow(seqlet_confusion, interpolation="nearest", aspect="equal", cmap="Blues")
+
+    # Set axes on heatmap
+    ax.set_yticks(np.arange(len(motif_keys)))
+    ax.set_yticklabels(motif_keys)
+    ax.set_xticks(np.arange(len(motif_keys)))
+    ax.set_xticklabels(motif_keys, rotation=90)
+    ax.set_xlabel("Hit motif")
+    ax.set_ylabel("Seqlet motif")
+
+    ax.tick_params(axis='both', labelsize=8)
+
+    cbar = fig.colorbar(cax, ax=ax, orientation="vertical", shrink=0.6, aspect=30)
+    cbar.ax.tick_params(labelsize=8) 
+
+    output_path_png = os.path.join(output_dir, "seqlet_confusion.png")
+    plt.savefig(output_path_png, dpi=300)
+    output_path_svg = os.path.join(output_dir, "seqlet_confusion.svg")
+    plt.savefig(output_path_svg)
+
+    plt.close()
+
+
+class LogoGlyph(AbstractPathEffect):
+    def __init__(self, glyph, ref_glyph='E', font_props=None,
+                 offset=(0., 0.), **kwargs):
+
+        super().__init__(offset)
+
+        path_orig = TextPath((0, 0), glyph, size=1, prop=font_props)
+        dims = path_orig.get_extents()
+        ref_dims = TextPath((0, 0), ref_glyph, size=1, prop=font_props).get_extents()
+
+        h_scale = 1 / dims.height
+        ref_width = max(dims.width, ref_dims.width)
+        w_scale = 1 / ref_width
+        w_shift = (1 - dims.width / ref_width) / 2
+        x_shift = -dims.x0
+        y_shift = -dims.y0
+        stretch = (
+            Affine2D()
+            .translate(tx=x_shift, ty=y_shift)
+            .scale(sx=w_scale, sy=h_scale)
+            .translate(tx=w_shift, ty=0)
+        )
+
+        self.path = stretch.transform_path(path_orig)
+
+        #: The dictionary of keywords to update the graphics collection with.
+        self._gc = kwargs
+
+    def draw_path(self, renderer, gc, tpath, affine, rgbFace):
+        return renderer.draw_path(gc, self.path, affine, rgbFace)
+
+
+def plot_logo(ax, heights, glyphs, colors=None, font_props=None, shade_bounds=None):
+    if colors is None:
+        colors = {g: None for g in glyphs}
+
+    ax.margins(x=0, y=0)
+    
+    pos_values = np.clip(heights, 0, None)
+    neg_values = np.clip(heights, None, 0)
+    pos_order = np.argsort(pos_values, axis=0)
+    neg_order = np.argsort(neg_values, axis=0)[::-1, :]
+    pos_reorder = np.argsort(pos_order, axis=0)
+    neg_reorder = np.argsort(neg_order, axis=0)
+    pos_offsets = np.take_along_axis(
+        np.cumsum(
+            np.take_along_axis(pos_values, pos_order, axis=0), axis=0
+        ), pos_reorder, axis=0
+    )
+    neg_offsets = np.take_along_axis(
+        np.cumsum(
+            np.take_along_axis(neg_values, neg_order, axis=0), axis=0
+        ), neg_reorder, axis=0
+    )
+    bottoms = pos_offsets + neg_offsets - heights
+
+    x = np.arange(heights.shape[1])
+
+    for glyph, height, bottom in zip(glyphs, heights, bottoms):
+        ax.bar(x, height, 0.95, bottom=bottom, 
+               path_effects=[LogoGlyph(glyph, font_props=font_props)], color=colors[glyph])
+
+    if shade_bounds is not None:
+        start, end = shade_bounds
+        ax.axvspan(start - 0.5, end - 0.5, color='0.9', zorder=-1)
+
+    ax.axhline(zorder=-1, linewidth=0.5, color='black')
+
+
+LOGO_ALPHABET = 'ACGT'
+LOGO_COLORS = {"A": '#109648', "C": '#255C99', "G": '#F7B32B', "T": '#D62839'}
+LOGO_FONT = FontProperties(weight="bold")
+
+
+def plot_cwms(cwms, trim_bounds, out_dir, alphabet=LOGO_ALPHABET, colors=LOGO_COLORS, font=LOGO_FONT):
+    for m, v in cwms.items():
+        motif_dir = os.path.join(out_dir, m)
+        os.makedirs(motif_dir, exist_ok=True)
+        for cwm_type, cwm in v.items():
+            fig, ax = plt.subplots(figsize=(10, 2))
+
+            plot_logo(ax, cwm, alphabet, colors=colors, font_props=font, shade_bounds=trim_bounds[m][cwm_type])
+
+            for name, spine in ax.spines.items():
+                spine.set_visible(False)
+            
+            output_path_png = os.path.join(motif_dir, f"{cwm_type}.png")
+            plt.savefig(output_path_png, dpi=100)
+            output_path_svg = os.path.join(motif_dir, f"{cwm_type}.svg")
+            plt.savefig(output_path_svg)
+
+            plt.close(fig)
+
+
+def plot_hit_vs_seqlet_counts(recall_data, output_dir):
+    x = []
+    y = []
+    m = []
+    for k, v in recall_data.items():
+        x.append(v["num_hits_total"])
+        y.append(v["num_seqlets"])
+        m.append(k)
+
+    lim = max(np.amax(x), np.amax(y))
+
+    fig, ax = plt.subplots(figsize=(8, 8), layout='constrained')
+    ax.axline((0, 0), (lim, lim), color="0.3", linewidth=0.7, linestyle=(0, (5, 5)))
+    ax.scatter(x, y, s=5)
+    for i, txt in enumerate(m):
+        short = abbreviate_motif_name(txt)
+        ax.annotate(short, (x[i], y[i]), fontsize=8, weight="bold")
+
+    ax.set_yscale('log')
+    ax.set_xscale('log')
+
+    ax.set_xlabel("Hits per motif")
+    ax.set_ylabel("Seqlets per motif")
+
+    output_path_png = os.path.join(output_dir, "hit_vs_seqlet_counts.png")
+    plt.savefig(output_path_png, dpi=300)
+    output_path_svg = os.path.join(output_dir, "hit_vs_seqlet_counts.svg")
+    plt.savefig(output_path_svg)
+
+    plt.close()
+
+
+def write_report(report_df, motif_names, out_path, compute_recall, use_seqlets):
+    template_str = importlib.resources.files(templates).joinpath('report.html').read_text()
+    template = Template(template_str)
+    report = template.render(report_data=report_df.iter_rows(named=True), 
+                             motif_names=motif_names, compute_recall=compute_recall, 
+                             use_seqlets=use_seqlets)
+    with open(out_path, "w") as f:
+        f.write(report)
\ No newline at end of file

From ff0282057e6fecdf5219c6a5927674e0d4bae2dc Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sat, 30 Aug 2025 07:38:39 -0700
Subject: [PATCH 13/39] Update gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index f68e48a..5c3c2fa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,5 @@
 __pycache__
 /notebooks
 /notebooks/old
-/scratch.txt
\ No newline at end of file
+/scratch.txt
+/scratch
\ No newline at end of file

From 670e1fe188b7ad7bc29fb9ee35188960f12abed4 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 31 Aug 2025 13:34:41 -0700
Subject: [PATCH 14/39] Code documentation

---
 .gitignore                       |    4 +-
 README.md                        |   76 +-
 environment.yml                  |    3 +-
 pyproject.toml                   |    7 +-
 setup.py                         |    2 +-
 src/finemo/__init__.py           |   63 ++
 src/finemo/data_io.py            | 1200 +++++++++++++++++++++----
 src/finemo/evaluation.py         |  527 ++++++++---
 src/finemo/hitcaller.py          |  786 +++++++++++++----
 src/finemo/main.py               | 1416 ++++++++++++++++++++++++------
 src/finemo/postprocessing.py     |  261 +++++-
 src/finemo/templates/report.html |  139 +--
 src/finemo/visualization.py      |  488 ++++++++--
 13 files changed, 4030 insertions(+), 942 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5c3c2fa..78dbf8b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,6 @@
-.conda
-.DS_Store
 *.egg-info
 __pycache__
+/.*
 /notebooks
-/notebooks/old
 /scratch.txt
 /scratch
\ No newline at end of file
diff --git a/README.md b/README.md
index 623b220..9231afd 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,36 @@
-# finemo_gpu
+# Fi-NeMo: Finding Neural network Motifs
 
-**Fi-NeMo** (**Fi**nding **Ne**ural network **Mo**tifs) is a GPU-accelerated hit caller for identifying occurrences of TFMoDISCo motifs within contribution scores generated by machine learning models.
+**Fi-NeMo** (**Fi**nding **Ne**ural network **Mo**tifs) is a GPU-accelerated motif instance calling tool for identifying transcription factor binding sites from neural network contribution scores.
+
+## Overview
+
+Fi-NeMo implements a competitive optimization approach using proximal gradient descent to identify motif instances by solving a sparse linear reconstruction problem. Unlike traditional sequence-based methods, Fi-NeMo leverages context-aware importance scores from deep neural networks to accurately map transcription factor binding sites, enabling the discovery of both high-confidence canonical motifs and low-prevalence cofactor motifs that are often missed by conventional approaches.
+
+The algorithm represents contribution scores as weighted combinations of motif contribution weight matrices (CWMs) at specific genomic positions. This competitive assignment process more closely reflects the biological reality of transcription factors competing for binding sites, resulting in superior sensitivity and specificity compared to sequence-only methods.
+
+### Features
+
+- **GPU-accelerated optimization**: Fast processing of large contribution score datasets using PyTorch
+- **Competitive motif assignment**: Biologically-motivated algorithm that resolves similar motifs
+- **Context-aware analysis**: Leverages neural network importance scores for improved sensitivity and specificity
+- **Comprehensive evaluation**: Built-in tools for assessing and visualizing motif discovery quality and hit calling performance
+- **Multiple input formats**: Support for bigWig, HDF5, and TF-MoDISCo output formats
+
+## Method
+
+Fi-NeMo solves motif instance calling as an optimization problem that reconstructs contribution score tracks as sparse linear combinations of motif CWMs. The algorithm formulates this as an L1-regularized linear model.
+
+This competitive assignment encourages overlapping motif instances to be resolved in a meaningful way, with stronger matches receiving higher coefficients while weaker or redundant matches are suppressed.
+
+## References
+
+Fi-NeMo is described in:
+> Tseng, Ramalingam, Wang, Schreiber, et al. "Decoding predictive motif lexicons and syntax from deep learning models of transcription factor binding profiles." (manuscript in preparation)
+
+Related tools:
+- [TF-MoDISCo](https://github.com/jmschrei/tfmodisco-lite): *De novo* motif discovery from importance scores
+- [BPNet](https://github.com/kundajelab/bpnet-refactor): Deep learning models for TF binding prediction
+- [ChromBPNet](https://github.com/kundajelab/chrombpnet): Deep learning models for chromatin accessibility prediction
 
 ## Installation
 
@@ -19,7 +49,7 @@ cd finemo_gpu
 
 #### Create a Conda Environment with Dependencies
 
-This step is optional but recommended
+This step is optional but recommended for conda users.
 
 ```sh
 conda env create -f environment.yml -n $ENV_NAME
@@ -60,7 +90,13 @@ Recommended:
 
 ## Usage
 
-Fi-NeMo includes a command-line utility named `finemo`. Here, we describe basic usage for each subcommand. For all options, run `finemo <subcommand> -h`.
+Fi-NeMo provides a command-line utility named `finemo` for motif instance calling and analysis. The typical workflow involves three main steps:
+
+1. **Preprocessing**: Transform input contributions and sequences into a compressed format
+2. **Hit Calling**: Identify motif instances using the Fi-NeMo algorithm  
+3. **Reporting and Analysis**: Generate visualizations and perform post-processing
+
+For detailed options for any subcommand, run `finemo <subcommand> -h`.
 
 ### Preprocessing
 
@@ -126,7 +162,7 @@ Usage: `finemo extract-regions-modisco-fmt -s <sequences> -a <attributions> -o <
 
 #### `finemo call-hits`
 
-Identify hits in input regions using TFMoDISCo CWM's.
+Identify motif instances in input regions using the Fi-NeMo competitive optimization algorithm. This is the core functionality that leverages TF-MoDISCo CWMs to find motif occurrences in contribution score data.
 
 Usage: `finemo call-hits -r <regions> -m <modisco_h5> -o <out_dir> [-p <peaks>] [-t <cwm_trim_threshold>] [-l <global_lambda>] [-b <batch_size>] [-J]`
 
@@ -195,13 +231,31 @@ Usage: `finemo call-hits -r <regions> -m <modisco_h5> -o <out_dir> [-p <peaks>]
 
 `params.json`: The parameters used for hit calling.
 
-#### Additional notes
+#### Parameter Guidelines
+
+**Sensitivity Control (`-l/--global-lambda`)**
+- Controls sparsity and sensitivity of hit calling
+- Higher values (e.g., 0.8-0.9) → fewer, higher-confidence hits
+- Lower values (e.g., 0.5-0.6) → more sensitive, may include weaker hits
+- Default of 0.7 works well for chromatin accessibility data
+- ChIP-seq data may benefit from lower values (0.6)
+
+**Motif Trimming (`-t/--cwm-trim-threshold`)**  
+- Determines where motif boundaries are set within full CWMs
+- Lower values → more conservative trimming, longer motifs
+- Higher values → more aggressive trimming, shorter core motifs
+- Affects resolution of closely-spaced motif instances
+
+**Performance Optimization (`-b/--batch-size`, `-J`)**
+- Set batch size to utilize available GPU memory efficiently
+- Reduce batch size if you encounter out-of-memory errors
+- Enable JIT compilation (`-J`) for faster execution on newer GPUs
+
+#### Important Notes
 
-- The `-l/--global-lambda` parameter controls the sensitivity of the hit-calling algorithm, with higher values resulting in fewer but more confident hits. This parameter represents the minimum cosine similarity between a query contribution score window and a CWM to be considered a hit. The default value of 0.7 typically works well for chromatin accessibility data. ChIP-Seq data may require a lower value (e.g. 0.6).
-- The `-t/--cwm-trim-threshold` parameter sets the maximum relative contribution score in trimmed-out CWM flanks. If you find that motif flanks are being trimmed too aggressively, consider lowering this value. However, a too-low value may result in closely-spaced motif instances being missed.
-- Set `-b/--batch-size` to fill a significant fraction of your GPU memory. **If you encounter GPU out-of-memory errors, try lowering this value.**
-- Legacy TFMoDISCo H5 files can be updated to the newer TFMoDISCo-lite format with the `modisco convert` command found in the [tfmodisco-lite](https://github.com/jmschrei/tfmodisco-lite/tree/main) package.
-- The hit-calling thresholding procedure is scale-invariant. That is, whether a position is assigned a hit depends on the shapes of the motif CWM and the contribution scores, not the absolute magnitude of the scores. If you wish to prioritize hits based on the magnitude of the contribution scores, set a per-motif rank threshold the `hit_coefficient_global` field in the `hits.tsv` file, which captures both the absolute importance and the closeness of match.
+- **Scale Invariance**: Hit calling depends on motif and contribution score shapes, not absolute magnitudes. Use `hit_coefficient_global` or `hit_importance` for importance-based thresholding.
+- **Competitive Assignment**: Overlapping motif candidates compete; only the best-fitting motif at each position receives a non-zero coefficient.
+- **Legacy Format Support**: Convert older TF-MoDISCo files using `modisco convert` from [tfmodisco-lite](https://github.com/jmschrei/tfmodisco-lite).
 
 ### Output reporting and post-processing
 
diff --git a/environment.yml b/environment.yml
index 98e4cde..3720c11 100644
--- a/environment.yml
+++ b/environment.yml
@@ -18,4 +18,5 @@ dependencies:
   - tqdm=4.67.1
   - jinja2=3.1.4
   - pybigwig=0.3.23
-  - pyfaidx=0.8.1.3
\ No newline at end of file
+  - pyfaidx=0.8.1.3
+  - jaxtyping=0.3.2
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index e527bce..747bb6a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,12 +6,12 @@ build-backend = "setuptools.build_meta"
 name = "finemo"
 description = "Identification of regulatory elements from neural network contribution scores for DNA."
 keywords = ["deep learning", "genomics"]
-version = "0.36"
+version = "0.40"
 readme = "README.md"
 license = {file = "LICENSE"}
 authors = [
     {name = "Austin Wang", email = "austin.wang1357@gmail.com"},
-    {name = "Anshul Kundaje"}
+    {name = "Anshul Kundaje", email = "akundaje@stanford.edu"}
 ]
 dependencies = [
     "numpy",
@@ -34,3 +34,6 @@ finemo = "finemo.main:cli"
 [project.urls]
 Homepage = "https://github.com/austintwang/finemo_gpu"
 Repository = "https://github.com/austintwang/finemo_gpu.git"
+
+[tool.ruff]
+ignore = ["F722"]
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 88c95dc..3e0f86b 100644
--- a/setup.py
+++ b/setup.py
@@ -3,4 +3,4 @@
 # Empty setup.py for compatibility with pip<21.1
 # See pyproject.toml for package configuration
 
-setup()
\ No newline at end of file
+setup()
diff --git a/src/finemo/__init__.py b/src/finemo/__init__.py
index e69de29..b170d01 100644
--- a/src/finemo/__init__.py
+++ b/src/finemo/__init__.py
@@ -0,0 +1,63 @@
+"""Fi-NeMo: Finding Neural network Motifs.
+
+A GPU-accelerated motif instance calling tool for identifying transcription factor
+binding sites from neural network contribution scores.
+
+Fi-NeMo implements a competitive optimization approach using proximal gradient descent
+to identify motif instances by solving a sparse linear reconstruction problem. The
+algorithm represents contribution scores as weighted combinations of motif contribution
+weight matrices (CWMs) at specific genomic positions.
+
+Key Features
+------------
+- GPU-accelerated hit calling using PyTorch
+- Support for multiple input formats (bigWig, HDF5, TF-MoDISCo)
+- Competitive motif instance assignment
+- Comprehensive evaluation and visualization tools
+- Post-processing utilities for hit refinement
+
+Modules
+-------
+hitcaller : Core Fi-NeMo algorithm implementation
+data_io : Data input/output utilities
+main : Command-line interface
+evaluation : Performance assessment tools
+visualization : Plotting and report generation
+postprocessing : Hit refinement and analysis
+
+Examples
+--------
+Basic hit calling workflow:
+
+>>> import finemo
+>>> from finemo import data_io, hitcaller
+>>>
+>>> # Load preprocessed data
+>>> sequences, contribs, peaks_df, has_peaks = data_io.load_regions_npz('regions.npz')
+>>> cwms, trim_masks = data_io.load_motif_cwms('motifs.h5')
+>>>
+>>> # Call hits
+>>> hits_df, qc_df = hitcaller.fit_contribs(
+...     cwms=cwms,
+...     contribs=contribs,
+...     sequences=sequences,
+...     cwm_trim_mask=trim_masks,
+...     use_hypothetical=False,
+...     lambdas=np.array([0.7] * len(cwms)),
+...     step_size_max=3.0,
+...     step_size_min=0.08,
+...     sqrt_transform=False,
+...     convergence_tol=0.0005,
+...     max_steps=10000,
+...     batch_size=1000,
+...     step_adjust=0.7,
+...     post_filter=True,
+...     device=None,
+...     compile_optimizer=False
+... )
+
+See Also
+--------
+TF-MoDISCo : https://github.com/jmschrei/tfmodisco-lite
+BPNet : https://github.com/kundajelab/bpnet-refactor
+"""
diff --git a/src/finemo/data_io.py b/src/finemo/data_io.py
index 1088d68..a604904 100644
--- a/src/finemo/data_io.py
+++ b/src/finemo/data_io.py
@@ -1,266 +1,846 @@
+"""Data input/output module for the Fi-NeMo motif instance calling pipeline.
+
+This module handles loading and processing of various genomic data formats including:
+- Peak region files (ENCODE NarrowPeak format)
+- Genome sequences (FASTA format)
+- Contribution scores (bigWig, HDF5 formats)
+- Neural network model outputs
+- Motif data from TF-MoDISCo
+- Hit calling results
+
+The module supports multiple input formats used for contribution scores
+and provides utilities for data conversion and quality control.
+"""
+
 import json
 import os
 import warnings
 from contextlib import ExitStack
+from typing import List, Dict, Tuple, Optional, Any, Union, Callable
 
 import numpy as np
+from numpy import ndarray
 import h5py
-import hdf5plugin
+import hdf5plugin  # noqa: F401, imported for side effects (HDF5 plugin registration)
 import polars as pl
 import pyBigWig
 import pyfaidx
+from jaxtyping import Float, Int
 
 from tqdm import tqdm
 
 
-def load_txt(path):
+def load_txt(path: str) -> List[str]:
+    """Load a text file containing one item per line.
+
+    Parameters
+    ----------
+    path : str
+        Path to the text file.
+
+    Returns
+    -------
+    List[str]
+        List of strings, one per line (first column if tab-delimited).
+    """
     entries = []
     with open(path) as f:
         for line in f:
             item = line.rstrip("\n").split("\t")[0]
             entries.append(item)
-    
+
     return entries
 
 
-def load_mapping(path, type):
+def load_mapping(path: str, value_type: Callable[[str], Any]) -> Dict[str, Any]:
+    """Load a two-column tab-delimited mapping file.
+
+    Parameters
+    ----------
+    path : str
+        Path to the mapping file. Must be tab-delimited with exactly two columns.
+    value_type : Callable[[str], Any]
+        Type constructor to apply to values (e.g., int, float, str).
+        Must accept a string and return the converted value.
+
+    Returns
+    -------
+    Dict[str, Any]
+        Dictionary mapping keys to values of the specified type.
+
+    Raises
+    ------
+    ValueError
+        If lines don't contain exactly two tab-separated values.
+    FileNotFoundError
+        If the specified file does not exist.
+    """
     mapping = {}
     with open(path) as f:
         for line in f:
             key, val = line.rstrip("\n").split("\t")
-            mapping[key] = type(val)
+            mapping[key] = value_type(val)
 
     return mapping
 
-def load_mapping_tuple(path, type):
+
+def load_mapping_tuple(
+    path: str, value_type: Callable[[str], Any]
+) -> Dict[str, Tuple[Any, ...]]:
+    """Load a mapping file where values are tuples from multiple columns.
+
+    Parameters
+    ----------
+    path : str
+        Path to the mapping file. Must be tab-delimited with multiple columns.
+    value_type : Callable[[str], Any]
+        Type constructor to apply to each value element.
+        Must accept a string and return the converted value.
+
+    Returns
+    -------
+    Dict[str, Tuple[Any, ...]]
+        Dictionary mapping keys to tuples of values of the specified type.
+        The first column is used as the key, remaining columns as tuple values.
+
+    Raises
+    ------
+    ValueError
+        If lines don't contain at least two tab-separated values.
+    FileNotFoundError
+        If the specified file does not exist.
+    """
     mapping = {}
     with open(path) as f:
         for line in f:
             entries = line.rstrip("\n").split("\t")
             key = entries[0]
             val = entries[1:]
-            mapping[key] = tuple(type(i) for i in val)
+            mapping[key] = tuple(value_type(i) for i in val)
 
     return mapping
 
 
-NARROWPEAK_SCHEMA = ["chr", "peak_start", "peak_end", "peak_name", "peak_score", 
-                     "peak_strand", "peak_signal", "peak_pval", "peak_qval", "peak_summit"]
-NARROWPEAK_DTYPES = [pl.String, pl.Int32, pl.Int32, pl.String, pl.UInt32, 
-                     pl.String, pl.Float32, pl.Float32, pl.Float32, pl.Int32] 
-
-def load_peaks(peaks_path, chrom_order_path, half_width):
+# ENCODE NarrowPeak format column definitions
+NARROWPEAK_SCHEMA: List[str] = [
+    "chr",
+    "peak_start",
+    "peak_end",
+    "peak_name",
+    "peak_score",
+    "peak_strand",
+    "peak_signal",
+    "peak_pval",
+    "peak_qval",
+    "peak_summit",
+]
+NARROWPEAK_DTYPES: List[Any] = [
+    pl.String,
+    pl.Int32,
+    pl.Int32,
+    pl.String,
+    pl.UInt32,
+    pl.String,
+    pl.Float32,
+    pl.Float32,
+    pl.Float32,
+    pl.Int32,
+]
+
+
+def load_peaks(
+    peaks_path: str, chrom_order_path: Optional[str], half_width: int
+) -> pl.DataFrame:
+    """Load peak region data from ENCODE NarrowPeak format file.
+
+    Parameters
+    ----------
+    peaks_path : str
+        Path to the NarrowPeak format file.
+    chrom_order_path : str, optional
+        Path to file defining chromosome ordering. If None, uses order from peaks file.
+    half_width : int
+        Half-width of regions around peak summits.
+
+    Returns
+    -------
+    pl.DataFrame
+        DataFrame containing peak information with columns:
+        - chr: Chromosome name
+        - peak_region_start: Start coordinate of centered region
+        - peak_name: Peak identifier
+        - peak_id: Sequential peak index
+        - chr_id: Numeric chromosome identifier
+    """
     peaks = (
-        pl.scan_csv(peaks_path, has_header=False, new_columns=NARROWPEAK_SCHEMA, separator='\t', 
-                    quote_char=None, schema_overrides=NARROWPEAK_DTYPES, null_values=['.', 'NA', 'null', 'NaN'])
+        pl.scan_csv(
+            peaks_path,
+            has_header=False,
+            new_columns=NARROWPEAK_SCHEMA,
+            separator="\t",
+            quote_char=None,
+            schema_overrides=NARROWPEAK_DTYPES,
+            null_values=[".", "NA", "null", "NaN"],
+        )
         .select(
             chr=pl.col("chr"),
             peak_region_start=pl.col("peak_start") + pl.col("peak_summit") - half_width,
-            peak_name=pl.col("peak_name")
+            peak_name=pl.col("peak_name"),
         )
         .with_row_index(name="peak_id")
         .collect()
     )
-    
+
     if chrom_order_path is not None:
         chrom_order = load_txt(chrom_order_path)
     else:
         chrom_order = []
 
     chrom_order_set = set(chrom_order)
-    chrom_order_peaks = [i for i in peaks.get_column("chr").unique(maintain_order=True) if i not in chrom_order_set]
+    chrom_order_peaks = [
+        i
+        for i in peaks.get_column("chr").unique(maintain_order=True)
+        if i not in chrom_order_set
+    ]
     chrom_order.extend(chrom_order_peaks)
     chrom_ind_map = {val: ind for ind, val in enumerate(chrom_order)}
 
     peaks = peaks.with_columns(
         pl.col("chr").replace_strict(chrom_ind_map).alias("chr_id")
     )
-    
+
     return peaks
 
 
-SEQ_ALPHABET = np.array(["A","C","G","T"], dtype="S1")
+# DNA sequence alphabet for one-hot encoding
+SEQ_ALPHABET: np.ndarray = np.array(["A", "C", "G", "T"], dtype="S1")
+
+
+def one_hot_encode(sequence: str, dtype: Any = np.int8) -> Int[ndarray, "4 L"]:
+    """Convert DNA sequence string to one-hot encoded matrix.
 
-def one_hot_encode(sequence, dtype=np.int8):
+    Parameters
+    ----------
+    sequence : str
+        DNA sequence string containing A, C, G, T characters.
+    dtype : np.dtype, default np.int8
+        Data type for the output array.
+
+    Returns
+    -------
+    Int[ndarray, "4 L"]
+        One-hot encoded sequence where rows correspond to A, C, G, T and
+        L is the sequence length.
+
+    Notes
+    -----
+    The output array has shape (4, len(sequence)) with rows corresponding to
+    nucleotides A, C, G, T in that order. Non-standard nucleotides (N, etc.)
+    result in all-zero columns.
+    """
     sequence = sequence.upper()
 
-    seq_chararray = np.frombuffer(sequence.encode('UTF-8'), dtype='S1')
-    one_hot = (seq_chararray[None,:] == SEQ_ALPHABET[:,None]).astype(dtype)
+    seq_chararray = np.frombuffer(sequence.encode("UTF-8"), dtype="S1")
+    one_hot = (seq_chararray[None, :] == SEQ_ALPHABET[:, None]).astype(dtype)
 
     return one_hot
 
 
-def load_regions_from_bw(peaks, fa_path, bw_paths, half_width):
+def load_regions_from_bw(
+    peaks: pl.DataFrame, fa_path: str, bw_paths: List[str], half_width: int
+) -> Tuple[Int[ndarray, "N 4 L"], Float[ndarray, "N L"]]:
+    """Load genomic sequences and contribution scores from FASTA and bigWig files.
+
+    Parameters
+    ----------
+    peaks : pl.DataFrame
+        Peak regions DataFrame from load_peaks() containing columns:
+        'chr', 'peak_region_start'.
+    fa_path : str
+        Path to genome FASTA file (.fa or .fasta format).
+    bw_paths : List[str]
+        List of paths to bigWig files containing contribution scores.
+        Must be non-empty.
+    half_width : int
+        Half-width of regions to extract around peak centers.
+        Total region width will be 2 * half_width.
+
+    Returns
+    -------
+    sequences : Int[ndarray, "N 4 L"]
+        One-hot encoded DNA sequences where N is the number of peaks,
+        4 represents A,C,G,T nucleotides, and L is the region length (2 * half_width).
+    contribs : Float[ndarray, "N L"]
+        Contribution scores averaged across input bigWig files.
+        Shape is (N peaks, L region_length).
+
+    Notes
+    -----
+    BigWig files only provide projected contribution scores, not hypothetical scores.
+    Regions extending beyond chromosome boundaries are zero-padded.
+    Missing values in bigWig files are converted to zero.
+    """
     num_peaks = peaks.height
+    region_width = half_width * 2
 
-    sequences = np.zeros((num_peaks, 4, half_width * 2), dtype=np.int8)
-    contribs = np.zeros((num_peaks, half_width * 2), dtype=np.float16)
+    sequences = np.zeros((num_peaks, 4, region_width), dtype=np.int8)
+    contribs = np.zeros((num_peaks, region_width), dtype=np.float16)
 
+    # Load genome reference
     genome = pyfaidx.Fasta(fa_path, one_based_attributes=False)
-    
+
     bws = [pyBigWig.open(i) for i in bw_paths]
     contrib_buffer = np.zeros((len(bw_paths), half_width * 2), dtype=np.float16)
 
     try:
-        for ind, row in tqdm(enumerate(peaks.iter_rows(named=True)), disable=None, unit="regions", total=num_peaks):
+        for ind, row in tqdm(
+            enumerate(peaks.iter_rows(named=True)),
+            disable=None,
+            unit="regions",
+            total=num_peaks,
+        ):
             chrom = row["chr"]
             start = row["peak_region_start"]
             end = start + 2 * half_width
-            
-            sequence_data = genome[chrom][start:end]
-            sequence = sequence_data.seq
-            start_adj = sequence_data.start
-            end_adj = sequence_data.end
+
+            sequence_data: pyfaidx.FastaRecord = genome[chrom][start:end]  # type: ignore
+            sequence: str = sequence_data.seq  # type: ignore
+            start_adj: int = sequence_data.start  # type: ignore
+            end_adj: int = sequence_data.end  # type: ignore
             a = start_adj - start
             b = end_adj - start
 
             if b > a:
-                sequences[ind,:,a:b] = one_hot_encode(sequence)
+                sequences[ind, :, a:b] = one_hot_encode(sequence)
 
                 for j, bw in enumerate(bws):
-                    contrib_buffer[j,:] = np.nan_to_num(bw.values(chrom, start_adj, end_adj, numpy=True))
+                    contrib_buffer[j, :] = np.nan_to_num(
+                        bw.values(chrom, start_adj, end_adj, numpy=True)
+                    )
+
+                contribs[ind, a:b] = np.mean(contrib_buffer, axis=0)
 
-                contribs[ind,a:b] = np.mean(contrib_buffer, axis=0)
-    
     finally:
         for bw in bws:
             bw.close()
-    
+
     return sequences, contribs
 
 
-def load_regions_from_chrombpnet_h5(h5_paths, half_width):
+def load_regions_from_chrombpnet_h5(
+    h5_paths: List[str], half_width: int
+) -> Tuple[Int[ndarray, "N 4 L"], Float[ndarray, "N 4 L"]]:
+    """Load genomic sequences and contribution scores from ChromBPNet HDF5 files.
+
+    Parameters
+    ----------
+    h5_paths : List[str]
+        List of paths to ChromBPNet HDF5 files containing sequences and SHAP scores.
+        Must be non-empty and contain compatible data shapes.
+    half_width : int
+        Half-width of regions to extract around the center.
+        Total region width will be 2 * half_width.
+
+    Returns
+    -------
+    sequences : Int[ndarray, "N 4 L"]
+        One-hot encoded DNA sequences where N is the number of regions,
+        4 represents A,C,G,T nucleotides, and L is the region length (2 * half_width).
+    contribs : Float[ndarray, "N 4 L"]
+        SHAP contribution scores averaged across input files.
+        Shape is (N regions, 4 nucleotides, L region_length).
+
+    Notes
+    -----
+    ChromBPNet files store sequences in 'raw/seq' and SHAP scores in 'shap/seq'.
+    All input files must have the same dimensions and number of regions.
+    Missing values in contribution scores are converted to zero.
+    """
     with ExitStack() as stack:
         h5s = [stack.enter_context(h5py.File(i)) for i in h5_paths]
 
-        start = h5s[0]['raw/seq'].shape[-1] // 2 - half_width
+        start = h5s[0]["raw/seq"].shape[-1] // 2 - half_width  # type: ignore  # HDF5 array access
         end = start + 2 * half_width
-        
-        sequences = h5s[0]['raw/seq'][:,:,start:end].astype(np.int8) 
-        contribs = np.mean([np.nan_to_num(f['shap/seq'][:,:,start:end]) for f in h5s], axis=0, dtype=np.float16)
-
-    return sequences, contribs
 
+        sequences = h5s[0]["raw/seq"][:, :, start:end].astype(np.int8)  # type: ignore  # HDF5 array access
+        contribs = np.mean(
+            [np.nan_to_num(f["shap/seq"][:, :, start:end]) for f in h5s],  # type: ignore  # HDF5 array access
+            axis=0,
+            dtype=np.float16,
+        )
 
-def load_regions_from_bpnet_h5(h5_paths, half_width):
+    return sequences, contribs  # type: ignore  # HDF5 arrays converted to NumPy
+
+
+def load_regions_from_bpnet_h5(
+    h5_paths: List[str], half_width: int
+) -> Tuple[Int[ndarray, "N 4 L"], Float[ndarray, "N 4 L"]]:
+    """Load genomic sequences and contribution scores from BPNet HDF5 files.
+
+    Parameters
+    ----------
+    h5_paths : List[str]
+        List of paths to BPNet HDF5 files containing sequences and contribution scores.
+        Must be non-empty and contain compatible data shapes.
+    half_width : int
+        Half-width of regions to extract around the center.
+        Total region width will be 2 * half_width.
+
+    Returns
+    -------
+    sequences : Int[ndarray, "N 4 L"]
+        One-hot encoded DNA sequences where N is the number of regions,
+        4 represents A,C,G,T nucleotides, and L is the region length (2 * half_width).
+    contribs : Float[ndarray, "N 4 L"]
+        Hypothetical contribution scores averaged across input files.
+        Shape is (N regions, 4 nucleotides, L region_length).
+
+    Notes
+    -----
+    BPNet files store sequences in 'input_seqs' and hypothetical scores in 'hyp_scores'.
+    The data requires axis swapping to convert from (n, length, 4) to (n, 4, length) format.
+    All input files must have the same dimensions and number of regions.
+    Missing values in contribution scores are converted to zero.
+    """
     with ExitStack() as stack:
         h5s = [stack.enter_context(h5py.File(i)) for i in h5_paths]
 
-        start = h5s[0]['input_seqs'].shape[-2] // 2 - half_width
+        start = h5s[0]["input_seqs"].shape[-2] // 2 - half_width  # type: ignore  # HDF5 array access
         end = start + 2 * half_width
 
-        sequences = h5s[0]['input_seqs'][:,start:end,:].swapaxes(1,2).astype(np.int8) 
-        contribs = np.mean([np.nan_to_num(f['hyp_scores'][:,start:end,:].swapaxes(1,2)) for f in h5s], axis=0, dtype=np.float16)
+        sequences = h5s[0]["input_seqs"][:, start:end, :].swapaxes(1, 2).astype(np.int8)  # type: ignore  # HDF5 array access with axis swap
+        contribs = np.mean(
+            [
+                np.nan_to_num(f["hyp_scores"][:, start:end, :].swapaxes(1, 2))  # type: ignore  # HDF5 array access
+                for f in h5s
+            ],
+            axis=0,
+            dtype=np.float16,
+        )
 
     return sequences, contribs
 
 
-def load_npy_or_npz(path):
+def load_npy_or_npz(path: str) -> ndarray:
+    """Load array data from .npy or .npz file.
+
+    Parameters
+    ----------
+    path : str
+        Path to .npy or .npz file. File must exist and contain valid NumPy data.
+
+    Returns
+    -------
+    ndarray
+        Loaded array data. For .npz files, returns the first array ('arr_0').
+        For .npy files, returns the array directly.
+
+    Raises
+    ------
+    FileNotFoundError
+        If the specified file does not exist.
+    KeyError
+        If .npz file does not contain 'arr_0' key.
+    """
     f = np.load(path)
     if isinstance(f, np.ndarray):
         arr = f
     else:
-        arr = f['arr_0']
+        arr = f["arr_0"]
 
     return arr
 
-def load_regions_from_modisco_fmt(shaps_paths, ohe_path, half_width):
+
+def load_regions_from_modisco_fmt(
+    shaps_paths: List[str], ohe_path: str, half_width: int
+) -> Tuple[Int[ndarray, "N 4 L"], Float[ndarray, "N 4 L"]]:
+    """Load genomic sequences and contribution scores from TF-MoDISCo format files.
+
+    Parameters
+    ----------
+    shaps_paths : List[str]
+        List of paths to .npy/.npz files containing SHAP/attribution scores.
+        Must be non-empty and all files must have compatible shapes.
+    ohe_path : str
+        Path to .npy/.npz file containing one-hot encoded sequences.
+        Must have shape (n_regions, 4, sequence_length).
+    half_width : int
+        Half-width of regions to extract around the center.
+        Total region width will be 2 * half_width.
+
+    Returns
+    -------
+    sequences : Int[ndarray, "N 4 L"]
+        One-hot encoded DNA sequences where N is the number of regions,
+        4 represents A,C,G,T nucleotides, and L is the region length (2 * half_width).
+    contribs : Float[ndarray, "N 4 L"]
+        SHAP contribution scores averaged across input files.
+        Shape is (N regions, 4 nucleotides, L region_length).
+
+    Notes
+    -----
+    All SHAP files must have the same shape as the sequence file.
+    Missing values in contribution scores are converted to zero.
+    The center of the input sequences is used as the reference point for extraction.
+    """
     sequences_raw = load_npy_or_npz(ohe_path)
 
     start = sequences_raw.shape[-1] // 2 - half_width
     end = start + 2 * half_width
 
-    sequences = sequences_raw[:,:,start:end].astype(np.int8)
+    sequences = sequences_raw[:, :, start:end].astype(np.int8)
 
-    shaps = [np.nan_to_num(load_npy_or_npz(p)[:,:,start:end]) for p in shaps_paths]
+    shaps = [np.nan_to_num(load_npy_or_npz(p)[:, :, start:end]) for p in shaps_paths]
     contribs = np.mean(shaps, axis=0, dtype=np.float16)
 
     return sequences, contribs
 
 
-def load_regions_npz(npz_path):
+def load_regions_npz(
+    npz_path: str,
+) -> Tuple[
+    Int[ndarray, "N 4 L"],
+    Union[Float[ndarray, "N 4 L"], Float[ndarray, "N L"]],
+    pl.DataFrame,
+    bool,
+]:
+    """Load preprocessed genomic regions from NPZ file.
+
+    Parameters
+    ----------
+    npz_path : str
+        Path to NPZ file containing sequences, contributions, and optional coordinates.
+        Must contain 'sequences' and 'contributions' arrays at minimum.
+
+    Returns
+    -------
+    sequences : Int[ndarray, "N 4 L"]
+        One-hot encoded DNA sequences where N is the number of regions,
+        4 represents A,C,G,T nucleotides, and L is the region length.
+    contributions : Union[Float[ndarray, "N 4 L"], Float[ndarray, "N L"]]
+        Contribution scores in either hypothetical format (N, 4, L) or
+        projected format (N, L). Shape depends on the input data format.
+    peaks_df : pl.DataFrame
+        DataFrame containing peak region information with columns:
+        'chr', 'chr_id', 'peak_region_start', 'peak_id', 'peak_name'.
+    has_peaks : bool
+        Whether the file contains genomic coordinate information.
+        If False, placeholder coordinate data is used.
+
+    Notes
+    -----
+    If genomic coordinates are not present in the NPZ file, creates placeholder
+    coordinate data and issues a warning. The placeholder data uses 'NA' for
+    chromosome names and sequential indices for peak IDs.
+
+    Raises
+    ------
+    KeyError
+        If required arrays 'sequences' or 'contributions' are missing from the file.
+    """
     data = np.load(npz_path)
-    
+
     if "chr" not in data.keys():
-        warnings.warn("No genome coordinates present in the input .npz file. Returning sequences and contributions only.")
+        warnings.warn(
+            "No genome coordinates present in the input .npz file. Returning sequences and contributions only."
+        )
         has_peaks = False
         num_regions = data["sequences"].shape[0]
-        peak_data = {"chr": np.array(["NA"] * num_regions, dtype='U'), "chr_id": np.arange(num_regions, dtype=np.uint32), 
-                       "peak_region_start": np.zeros(num_regions, dtype=np.int32), "peak_id": np.arange(num_regions, dtype=np.uint32), 
-                       "peak_name": np.array(["NA"] * num_regions, dtype='U')}
+        peak_data = {
+            "chr": np.array(["NA"] * num_regions, dtype="U"),
+            "chr_id": np.arange(num_regions, dtype=np.uint32),
+            "peak_region_start": np.zeros(num_regions, dtype=np.int32),
+            "peak_id": np.arange(num_regions, dtype=np.uint32),
+            "peak_name": np.array(["NA"] * num_regions, dtype="U"),
+        }
 
     else:
         has_peaks = True
-        peak_data = {"chr": data["chr"], "chr_id": data["chr_id"], "peak_region_start": data["start"],
-                    "peak_id": data["peak_id"], "peak_name": data["peak_name"]}
-        
+        peak_data = {
+            "chr": data["chr"],
+            "chr_id": data["chr_id"],
+            "peak_region_start": data["start"],
+            "peak_id": data["peak_id"],
+            "peak_name": data["peak_name"],
+        }
+
     peaks_df = pl.DataFrame(peak_data)
 
     return data["sequences"], data["contributions"], peaks_df, has_peaks
 
 
-def write_regions_npz(sequences, contributions, out_path, peaks_df=None):
+def write_regions_npz(
+    sequences: Int[ndarray, "N 4 L"],
+    contributions: Union[Float[ndarray, "N 4 L"], Float[ndarray, "N L"]],
+    out_path: str,
+    peaks_df: Optional[pl.DataFrame] = None,
+) -> None:
+    """Write genomic regions and contribution scores to compressed NPZ file.
+
+    Parameters
+    ----------
+    sequences : Int[ndarray, "N 4 L"]
+        One-hot encoded DNA sequences where N is the number of regions,
+        4 represents A,C,G,T nucleotides, and L is the region length.
+    contributions : Union[Float[ndarray, "N 4 L"], Float[ndarray, "N L"]]
+        Contribution scores in either hypothetical format (N, 4, L) or
+        projected format (N, L).
+    out_path : str
+        Output path for the NPZ file. Parent directory must exist.
+    peaks_df : Optional[pl.DataFrame]
+        DataFrame containing peak region information with columns:
+        'chr', 'chr_id', 'peak_region_start', 'peak_id', 'peak_name'.
+        If None, only sequences and contributions are saved.
+
+    Raises
+    ------
+    ValueError
+        If the number of regions in sequences/contributions doesn't match peaks_df.
+    FileNotFoundError
+        If the parent directory of out_path does not exist.
+
+    Notes
+    -----
+    The output file is compressed using NumPy's savez_compressed format.
+    If peaks_df is provided, genomic coordinate information is included
+    in the output file for downstream analysis.
+    """
     if peaks_df is None:
-        warnings.warn("No genome coordinates provided. Writing sequences and contributions only.")
+        warnings.warn(
+            "No genome coordinates provided. Writing sequences and contributions only."
+        )
         np.savez_compressed(out_path, sequences=sequences, contributions=contributions)
 
     else:
         num_regions = peaks_df.height
-        if (num_regions != sequences.shape[0]) or (num_regions != contributions.shape[0]):
-            raise ValueError(f"Input sequences of shape {sequences.shape} and/or " 
-                            f"input contributions of shape {contributions.shape} "
-                            f"are not compatible with peak region count of {num_regions}" )
-
-        chr_arr = peaks_df.get_column("chr").to_numpy().astype('U')
+        if (num_regions != sequences.shape[0]) or (
+            num_regions != contributions.shape[0]
+        ):
+            raise ValueError(
+                f"Input sequences of shape {sequences.shape} and/or "
+                f"input contributions of shape {contributions.shape} "
+                f"are not compatible with peak region count of {num_regions}"
+            )
+
+        chr_arr = peaks_df.get_column("chr").to_numpy().astype("U")
         chr_id_arr = peaks_df.get_column("chr_id").to_numpy()
         start_arr = peaks_df.get_column("peak_region_start").to_numpy()
         peak_id_arr = peaks_df.get_column("peak_id").to_numpy()
-        peak_name_arr = peaks_df.get_column("peak_name").to_numpy().astype('U')
-        np.savez_compressed(out_path, sequences=sequences, contributions=contributions,
-                            chr=chr_arr, chr_id=chr_id_arr, start=start_arr, peak_id=peak_id_arr, peak_name=peak_name_arr)
+        peak_name_arr = peaks_df.get_column("peak_name").to_numpy().astype("U")
+        np.savez_compressed(
+            out_path,
+            sequences=sequences,
+            contributions=contributions,
+            chr=chr_arr,
+            chr_id=chr_id_arr,
+            start=start_arr,
+            peak_id=peak_id_arr,
+            peak_name=peak_name_arr,
+        )
 
 
-def trim_motif(cwm, trim_threshold):
-    """
+def trim_motif(cwm: Float[ndarray, "4 W"], trim_threshold: float) -> Tuple[int, int]:
+    """Determine trimmed start and end positions for a motif based on contribution magnitude.
+
+    This function identifies the core region of a motif by finding positions where
+    the total absolute contribution exceeds a threshold relative to the maximum.
+
+    Parameters
+    ----------
+    cwm : Float[ndarray, "4 W"]
+        Contribution weight matrix for the motif where 4 represents A,C,G,T
+        nucleotides and W is the motif width.
+    trim_threshold : float
+        Fraction of maximum score to use as trimming threshold (0.0 to 1.0).
+        Higher values result in more aggressive trimming.
+
+    Returns
+    -------
+    start : int
+        Start position of the trimmed motif (inclusive).
+    end : int
+        End position of the trimmed motif (exclusive).
+
+    Notes
+    -----
+    The trimming is based on the sum of absolute contributions across all nucleotides
+    at each position. Positions with contributions below trim_threshold * max_score
+    are removed from the motif edges.
+
     Adapted from https://github.com/jmschrei/tfmodisco-lite/blob/570535ee5ccf43d670e898d92d63af43d68c38c5/modiscolite/report.py#L213-L236
     """
     score = np.sum(np.abs(cwm), axis=0)
     trim_thresh = np.max(score) * trim_threshold
     pass_inds = np.nonzero(score >= trim_thresh)
-    start = max(np.min(pass_inds), 0)
-    end = min(np.max(pass_inds) + 1, len(score))
+    start = max(int(np.min(pass_inds)), 0)  # type: ignore  # nonzero returns tuple of arrays
+    end = min(int(np.max(pass_inds)) + 1, len(score))  # type: ignore  # nonzero returns tuple of arrays
 
     return start, end
 
 
-def softmax(x, temp=100):
+def softmax(x: Float[ndarray, "4 W"], temp: float = 100) -> Float[ndarray, "4 W"]:
+    """Apply softmax transformation with temperature scaling.
+
+    Parameters
+    ----------
+    x : Float[ndarray, "4 W"]
+        Input array to transform where 4 represents A,C,G,T nucleotides
+        and W is the motif width.
+    temp : float, default 100
+        Temperature parameter for softmax scaling. Higher values create
+        sharper probability distributions.
+
+    Returns
+    -------
+    Float[ndarray, "4 W"]
+        Softmax-transformed array with same shape as input. Each column
+        sums to 1.0, representing nucleotide probabilities at each position.
+
+    Notes
+    -----
+    The softmax is applied along the nucleotide axis (axis=0), normalizing
+    each position to have probabilities that sum to 1. The temperature
+    parameter controls the sharpness of the distribution.
+    """
     norm_x = x - np.mean(x, axis=1, keepdims=True)
     exp = np.exp(temp * norm_x)
     return exp / np.sum(exp, axis=0, keepdims=True)
 
 
-def _motif_name_sort_key(data):
+def _motif_name_sort_key(data: Tuple[str, Any]) -> Union[Tuple[int], Tuple[int, str]]:
+    """Generate sort key for TF-MoDISCo motif names.
+
+    This function creates a sort key that orders motifs by pattern number,
+    with non-standard patterns sorted to the end.
+
+    Parameters
+    ----------
+    data : Tuple[str, Any]
+        Tuple containing motif name as first element and additional data.
+        The motif name should follow the format 'pattern_N' where N is an integer.
+
+    Returns
+    -------
+    Union[Tuple[int], Tuple[int, str]]
+        Sort key tuple for ordering motifs. Standard pattern names return
+        (pattern_number,) while non-standard names return (-1, name).
+
+    Notes
+    -----
+    This function is used internally by load_modisco_motifs to ensure
+    consistent motif ordering across runs.
+    """
     name = data[0]
     if name.startswith("pattern_"):
         pattern_num = int(name.split("_")[-1])
         return (pattern_num,)
     else:
-        return (-1, name)
+        return (-1, name)  # Mixed tuple types for sorting
+
+
+MODISCO_PATTERN_GROUPS = ["pos_patterns", "neg_patterns"]
+
+
+def load_modisco_motifs(
+    modisco_h5_path: str,
+    trim_coords: Optional[Dict[str, Tuple[int, int]]],
+    trim_thresholds: Optional[Dict[str, float]],
+    trim_threshold_default: float,
+    motif_type: str,
+    motifs_include: Optional[List[str]],
+    motif_name_map: Optional[Dict[str, str]],
+    motif_lambdas: Optional[Dict[str, float]],
+    motif_lambda_default: float,
+    include_rc: bool,
+) -> Tuple[pl.DataFrame, Float[ndarray, "M 4 W"], Int[ndarray, "M W"], ndarray]:
+    """Load motif data from TF-MoDISCo HDF5 file with customizable processing options.
+
+    This function extracts contribution weight matrices and associated metadata from
+    TF-MoDISCo results, with support for custom naming, trimming, and regularization
+    parameters.
+
+    Parameters
+    ----------
+    modisco_h5_path : str
+        Path to TF-MoDISCo HDF5 results file containing pattern groups.
+    trim_coords : Optional[Dict[str, Tuple[int, int]]]
+        Manual trim coordinates for specific motifs {motif_name: (start, end)}.
+        Takes precedence over automatic trimming based on thresholds.
+    trim_thresholds : Optional[Dict[str, float]]
+        Custom trim thresholds for specific motifs {motif_name: threshold}.
+        Values should be between 0.0 and 1.0.
+    trim_threshold_default : float
+        Default trim threshold for motifs not in trim_thresholds.
+        Fraction of maximum contribution used for trimming.
+    motif_type : str
+        Type of motif to extract. Must be one of:
+        - 'cwm': Contribution weight matrix (normalized)
+        - 'hcwm': Hypothetical contribution weight matrix
+        - 'pfm': Position frequency matrix
+        - 'pfm_softmax': Softmax-transformed position frequency matrix
+    motifs_include : Optional[List[str]]
+        List of motif names to include. If None, includes all motifs found.
+        Names should follow format 'pos_patterns.pattern_N' or 'neg_patterns.pattern_N'.
+    motif_name_map : Optional[Dict[str, str]]
+        Mapping from original to custom motif names {orig_name: new_name}.
+        New names must be unique across all motifs.
+    motif_lambdas : Optional[Dict[str, float]]
+        Custom lambda regularization values for specific motifs {motif_name: lambda}.
+        Higher values increase sparsity penalty for the corresponding motif.
+    motif_lambda_default : float
+        Default lambda value for motifs not specified in motif_lambdas.
+    include_rc : bool
+        Whether to include reverse complement motifs in addition to forward motifs.
+        If True, doubles the number of motifs returned.
+
+    Returns
+    -------
+    motifs_df : pl.DataFrame
+        DataFrame containing motif metadata with columns: motif_id, motif_name,
+        motif_name_orig, strand, motif_start, motif_end, motif_scale, lambda.
+    cwms : Float[ndarray, "M 4 W"]
+        Contribution weight matrices for all motifs where M is the number of motifs,
+        4 represents A,C,G,T nucleotides, and W is the motif width.
+    trim_masks : Int[ndarray, "M W"]
+        Binary masks indicating core motif regions (1) vs trimmed regions (0).
+        Shape is (M motifs, W motif_width).
+    names : ndarray
+        Array of unique motif names (forward strand only).
+
+    Raises
+    ------
+    ValueError
+        If motif_type is not one of the supported types, or if motif names
+        in motif_name_map are not unique.
+    FileNotFoundError
+        If the specified HDF5 file does not exist.
+    KeyError
+        If required datasets are missing from the HDF5 file.
+
+    Notes
+    -----
+    Motif trimming removes low-contribution positions from the edges based on
+    the position-wise sum of absolute contributions across nucleotides. The trimming
+    helps focus on the core binding site.
 
-MODISCO_PATTERN_GROUPS = ['pos_patterns', 'neg_patterns']
-
-def load_modisco_motifs(modisco_h5_path, trim_coords, trim_thresholds, trim_threshold_default, motif_type, 
-                        motifs_include, motif_name_map, motif_lambdas, motif_lambda_default, include_rc):
-    """
     Adapted from https://github.com/jmschrei/tfmodisco-lite/blob/570535ee5ccf43d670e898d92d63af43d68c38c5/modiscolite/report.py#L252-L272
     """
-    motif_data_lsts = {"motif_name": [], "motif_name_orig": [], "strand": [], "motif_start": [], 
-                       "motif_end": [], "motif_scale": [], "lambda": []}
-    motif_lst = [] 
+    motif_data_lsts = {
+        "motif_name": [],
+        "motif_name_orig": [],
+        "strand": [],
+        "motif_start": [],
+        "motif_end": [],
+        "motif_scale": [],
+        "lambda": [],
+    }
+    motif_lst = []
     trim_mask_lst = []
 
     if motifs_include is not None:
-        motifs_include = set(motifs_include)
+        motifs_include_set = set(motifs_include)
+    else:
+        motifs_include_set = None
 
     if motif_name_map is None:
         motif_name_map = {}
@@ -276,37 +856,44 @@ def load_modisco_motifs(modisco_h5_path, trim_coords, trim_thresholds, trim_thre
     if len(motif_name_map.values()) != len(set(motif_name_map.values())):
         raise ValueError("Specified motif names are not unique")
 
-    with h5py.File(modisco_h5_path, 'r') as modisco_results:
+    with h5py.File(modisco_h5_path, "r") as modisco_results:
         for name in MODISCO_PATTERN_GROUPS:
             if name not in modisco_results.keys():
                 continue
 
             metacluster = modisco_results[name]
-            for ind, (pattern_name, pattern) in enumerate(sorted(metacluster.items(), key=_motif_name_sort_key)):
-                pattern_tag = f'{name}.{pattern_name}'
-
-                if motifs_include is not None and pattern_tag not in motifs_include:
+            for _, (pattern_name, pattern) in enumerate(
+                sorted(metacluster.items(), key=_motif_name_sort_key)  # type: ignore  # HDF5 access
+            ):
+                pattern_tag = f"{name}.{pattern_name}"
+
+                if (
+                    motifs_include_set is not None
+                    and pattern_tag not in motifs_include_set
+                ):
                     continue
 
                 motif_lambda = motif_lambdas.get(pattern_tag, motif_lambda_default)
                 pattern_tag_orig = pattern_tag
                 pattern_tag = motif_name_map.get(pattern_tag, pattern_tag)
 
-                cwm_raw = pattern['contrib_scores'][:].T
+                cwm_raw = pattern["contrib_scores"][:].T  # type: ignore
                 cwm_norm = np.sqrt((cwm_raw**2).sum())
 
                 cwm_fwd = cwm_raw / cwm_norm
-                cwm_rev = cwm_fwd[::-1,::-1]
+                cwm_rev = cwm_fwd[::-1, ::-1]
 
                 if pattern_tag in trim_coords:
                     start_fwd, end_fwd = trim_coords[pattern_tag]
                 else:
-                    trim_threshold = trim_thresholds.get(pattern_tag, trim_threshold_default)
+                    trim_threshold = trim_thresholds.get(
+                        pattern_tag, trim_threshold_default
+                    )
                     start_fwd, end_fwd = trim_motif(cwm_fwd, trim_threshold)
 
                 cwm_len = cwm_fwd.shape[1]
                 start_rev, end_rev = cwm_len - end_fwd, cwm_len - start_fwd
-                
+
                 trim_mask_fwd = np.zeros(cwm_fwd.shape[1], dtype=np.int8)
                 trim_mask_fwd[start_fwd:end_fwd] = 1
                 trim_mask_rev = np.zeros(cwm_rev.shape[1], dtype=np.int8)
@@ -318,29 +905,34 @@ def load_modisco_motifs(modisco_h5_path, trim_coords, trim_thresholds, trim_thre
                     motif_norm = cwm_norm
 
                 elif motif_type == "hcwm":
-                    motif_raw = pattern['hypothetical_contribs'][:].T
+                    motif_raw = pattern["hypothetical_contribs"][:].T  # type: ignore
                     motif_norm = np.sqrt((motif_raw**2).sum())
 
                     motif_fwd = motif_raw / motif_norm
-                    motif_rev = motif_fwd[::-1,::-1]
+                    motif_rev = motif_fwd[::-1, ::-1]
 
                 elif motif_type == "pfm":
-                    motif_raw = pattern['sequence'][:].T
+                    motif_raw = pattern["sequence"][:].T  # type: ignore
                     motif_norm = 1
 
                     motif_fwd = motif_raw / np.sum(motif_raw, axis=0, keepdims=True)
-                    motif_rev = motif_fwd[::-1,::-1]
+                    motif_rev = motif_fwd[::-1, ::-1]
 
                 elif motif_type == "pfm_softmax":
-                    motif_raw = pattern['sequence'][:].T
+                    motif_raw = pattern["sequence"][:].T  # type: ignore
                     motif_norm = 1
 
                     motif_fwd = softmax(motif_raw)
-                    motif_rev = motif_fwd[::-1,::-1]
+                    motif_rev = motif_fwd[::-1, ::-1]
+
+                else:
+                    raise ValueError(
+                        f"Invalid motif_type: {motif_type}. Must be one of 'cwm', 'hcwm', 'pfm', 'pfm_softmax'."
+                    )
 
                 motif_data_lsts["motif_name"].append(pattern_tag)
                 motif_data_lsts["motif_name_orig"].append(pattern_tag_orig)
-                motif_data_lsts["strand"].append('+')
+                motif_data_lsts["strand"].append("+")
                 motif_data_lsts["motif_start"].append(start_fwd)
                 motif_data_lsts["motif_end"].append(end_fwd)
                 motif_data_lsts["motif_scale"].append(motif_norm)
@@ -349,7 +941,7 @@ def load_modisco_motifs(modisco_h5_path, trim_coords, trim_thresholds, trim_thre
                 if include_rc:
                     motif_data_lsts["motif_name"].append(pattern_tag)
                     motif_data_lsts["motif_name_orig"].append(pattern_tag_orig)
-                    motif_data_lsts["strand"].append('-')
+                    motif_data_lsts["strand"].append("-")
                     motif_data_lsts["motif_start"].append(start_rev)
                     motif_data_lsts["motif_end"].append(end_rev)
                     motif_data_lsts["motif_scale"].append(motif_norm)
@@ -361,17 +953,75 @@ def load_modisco_motifs(modisco_h5_path, trim_coords, trim_thresholds, trim_thre
                 else:
                     motif_lst.append(motif_fwd)
                     trim_mask_lst.append(trim_mask_fwd)
-                
+
     motifs_df = pl.DataFrame(motif_data_lsts).with_row_index(name="motif_id")
     cwms = np.stack(motif_lst, dtype=np.float16, axis=0)
     trim_masks = np.stack(trim_mask_lst, dtype=np.int8, axis=0)
-    names = motifs_df.filter(pl.col("strand") == "+").get_column("motif_name").to_numpy()
+    names = (
+        motifs_df.filter(pl.col("strand") == "+").get_column("motif_name").to_numpy()
+    )
 
     return motifs_df, cwms, trim_masks, names
 
 
-def load_modisco_seqlets(modisco_h5_path, peaks_df, motifs_df, half_width, modisco_half_width, lazy=False):
-    
+def load_modisco_seqlets(
+    modisco_h5_path: str,
+    peaks_df: pl.DataFrame,
+    motifs_df: pl.DataFrame,
+    half_width: int,
+    modisco_half_width: int,
+    lazy: bool = False,
+) -> Union[pl.DataFrame, pl.LazyFrame]:
+    """Load seqlet data from TF-MoDISCo HDF5 file and convert to genomic coordinates.
+
+    This function extracts seqlet instances from TF-MoDISCo results and converts
+    their relative positions to absolute genomic coordinates using peak region
+    information.
+
+    Parameters
+    ----------
+    modisco_h5_path : str
+        Path to TF-MoDISCo HDF5 results file containing seqlet data.
+    peaks_df : pl.DataFrame
+        DataFrame containing peak region information with columns:
+        'peak_id', 'chr', 'chr_id', 'peak_region_start'.
+    motifs_df : pl.DataFrame
+        DataFrame containing motif metadata with columns:
+        'motif_name_orig', 'strand', 'motif_name', 'motif_start', 'motif_end'.
+    half_width : int
+        Half-width of the current analysis regions.
+    modisco_half_width : int
+        Half-width of the regions used in the original TF-MoDISCo analysis.
+        Used to calculate coordinate offsets.
+    lazy : bool, default False
+        If True, returns a LazyFrame for efficient chaining of operations.
+        If False, collects the result into a DataFrame.
+
+    Returns
+    -------
+    Union[pl.DataFrame, pl.LazyFrame]
+        Seqlets with genomic coordinates containing columns:
+        - chr: Chromosome name
+        - chr_id: Numeric chromosome identifier
+        - start: Start coordinate of trimmed motif instance
+        - end: End coordinate of trimmed motif instance
+        - start_untrimmed: Start coordinate of full motif instance
+        - end_untrimmed: End coordinate of full motif instance
+        - is_revcomp: Whether the motif is reverse complemented
+        - strand: Motif strand ('+' or '-')
+        - motif_name: Motif name (may be remapped)
+        - peak_id: Peak identifier
+        - peak_region_start: Peak region start coordinate
+
+    Notes
+    -----
+    Seqlets are deduplicated based on chromosome ID, start position (untrimmed),
+    motif name, and reverse complement status to avoid redundant instances.
+
+    The coordinate transformation accounts for differences in region sizes
+    between the original TF-MoDISCo analysis and the current analysis.
+    """
+
     start_lst = []
     end_lst = []
     is_revcomp_lst = []
@@ -379,23 +1029,29 @@ def load_modisco_seqlets(modisco_h5_path, peaks_df, motifs_df, half_width, modis
     peak_id_lst = []
     pattern_tags = []
 
-    with h5py.File(modisco_h5_path, 'r') as modisco_results:
+    with h5py.File(modisco_h5_path, "r") as modisco_results:
         for name in MODISCO_PATTERN_GROUPS:
             if name not in modisco_results.keys():
                 continue
 
             metacluster = modisco_results[name]
-            key = lambda x: int(x[0].split("_")[-1])
-            for ind, (pattern_name, pattern) in enumerate(sorted(metacluster.items(), key=key)):
-                pattern_tag = f'{name}.{pattern_name}'
 
-                starts = pattern['seqlets/start'][:].astype(np.int32)
-                ends = pattern['seqlets/end'][:].astype(np.int32)
-                is_revcomps = pattern['seqlets/is_revcomp'][:].astype(bool)
-                strands = ['+' if not i else '-' for i in is_revcomps]
-                peak_ids = pattern['seqlets/example_idx'][:].astype(np.uint32)
+            def get_pattern_number(x):
+                return int(x[0].split("_")[-1])
 
-                n_seqlets = int(pattern['seqlets/n_seqlets'][0])
+            key = get_pattern_number
+            for _, (pattern_name, pattern) in enumerate(
+                sorted(metacluster.items(), key=key)  # type: ignore  # HDF5 access
+            ):
+                pattern_tag = f"{name}.{pattern_name}"
+
+                starts = pattern["seqlets/start"][:].astype(np.int32)  # type: ignore
+                ends = pattern["seqlets/end"][:].astype(np.int32)  # type: ignore
+                is_revcomps = pattern["seqlets/is_revcomp"][:].astype(bool)  # type: ignore
+                strands = ["+" if not i else "-" for i in is_revcomps]
+                peak_ids = pattern["seqlets/example_idx"][:].astype(np.uint32)  # type: ignore
+
+                n_seqlets = int(pattern["seqlets/n_seqlets"][0])  # type: ignore
 
                 start_lst.append(starts)
                 end_lst.append(ends)
@@ -412,7 +1068,7 @@ def load_modisco_seqlets(modisco_h5_path, peaks_df, motifs_df, half_width, modis
         "peak_id": np.concatenate(peak_id_lst),
         "motif_name_orig": pattern_tags,
     }
-    
+
     offset = half_width - modisco_half_width
 
     seqlets_df = (
@@ -422,15 +1078,23 @@ def load_modisco_seqlets(modisco_h5_path, peaks_df, motifs_df, half_width, modis
         .select(
             chr=pl.col("chr"),
             chr_id=pl.col("chr_id"),
-            start=pl.col("peak_region_start") + pl.col("seqlet_start") + pl.col("motif_start") + offset,
-            end=pl.col("peak_region_start") + pl.col("seqlet_start") + pl.col("motif_end") + offset,
-            start_untrimmed=pl.col("peak_region_start") + pl.col("seqlet_start") + offset,
+            start=pl.col("peak_region_start")
+            + pl.col("seqlet_start")
+            + pl.col("motif_start")
+            + offset,
+            end=pl.col("peak_region_start")
+            + pl.col("seqlet_start")
+            + pl.col("motif_end")
+            + offset,
+            start_untrimmed=pl.col("peak_region_start")
+            + pl.col("seqlet_start")
+            + offset,
             end_untrimmed=pl.col("peak_region_start") + pl.col("seqlet_end") + offset,
             is_revcomp=pl.col("is_revcomp"),
             strand=pl.col("strand"),
             motif_name=pl.col("motif_name"),
             peak_id=pl.col("peak_id"),
-            peak_region_start=pl.col("peak_region_start")
+            peak_region_start=pl.col("peak_region_start"),
         )
         .unique(subset=["chr_id", "start_untrimmed", "motif_name", "is_revcomp"])
     )
@@ -440,8 +1104,27 @@ def load_modisco_seqlets(modisco_h5_path, peaks_df, motifs_df, half_width, modis
     return seqlets_df
 
 
-def write_modisco_seqlets(seqlets_df, out_path):
+def write_modisco_seqlets(
+    seqlets_df: Union[pl.DataFrame, pl.LazyFrame], out_path: str
+) -> None:
+    """Write TF-MoDISCo seqlets to TSV file.
+
+    Parameters
+    ----------
+    seqlets_df : Union[pl.DataFrame, pl.LazyFrame]
+        Seqlets DataFrame with genomic coordinates. Must contain columns
+        that are safe to drop: 'chr_id', 'is_revcomp'.
+    out_path : str
+        Output TSV file path.
+
+    Notes
+    -----
+    Removes internal columns 'chr_id' and 'is_revcomp' before writing
+    to create a clean output format suitable for downstream analysis.
+    """
     seqlets_df = seqlets_df.drop(["chr_id", "is_revcomp"])
+    if isinstance(seqlets_df, pl.LazyFrame):
+        seqlets_df = seqlets_df.collect()
     seqlets_df.write_csv(out_path, separator="\t")
 
 
@@ -465,71 +1148,144 @@ def write_modisco_seqlets(seqlets_df, out_path):
 HITS_COLLAPSED_DTYPES = HITS_DTYPES | {"is_primary": pl.UInt32}
 
 
-def load_hits(hits_path, lazy=False, schema=HITS_DTYPES):
-    hits_df = (
-        pl.scan_csv(hits_path, separator='\t', quote_char=None, schema=schema)
-        .with_columns(pl.lit(1).alias("count"))
-    )
+def load_hits(
+    hits_path: str, lazy: bool = False, schema: Dict[str, Any] = HITS_DTYPES
+) -> Union[pl.DataFrame, pl.LazyFrame]:
+    """Load motif hit data from TSV file.
+
+    Parameters
+    ----------
+    hits_path : str
+        Path to TSV file containing motif hit results.
+    lazy : bool, default False
+        If True, returns a LazyFrame for efficient chaining operations.
+        If False, collects the result into a DataFrame.
+    schema : Dict[str, Any], default HITS_DTYPES
+        Schema defining column names and data types for the hit data.
+
+    Returns
+    -------
+    Union[pl.DataFrame, pl.LazyFrame]
+        Hit data with an additional 'count' column set to 1 for aggregation.
+    """
+    hits_df = pl.scan_csv(
+        hits_path, separator="\t", quote_char=None, schema=schema
+    ).with_columns(pl.lit(1).alias("count"))
 
     return hits_df if lazy else hits_df.collect()
 
 
-def write_hits_processed(hits_df, out_path, schema=HITS_DTYPES):
+def write_hits_processed(
+    hits_df: Union[pl.DataFrame, pl.LazyFrame],
+    out_path: str,
+    schema: Optional[Dict[str, Any]] = HITS_DTYPES,
+) -> None:
+    """Write processed hit data to TSV file with optional column filtering.
+
+    Parameters
+    ----------
+    hits_df : Union[pl.DataFrame, pl.LazyFrame]
+        Hit data to write to file.
+    out_path : str
+        Output path for the TSV file.
+    schema : Optional[Dict[str, Any]], default HITS_DTYPES
+        Schema defining which columns to include in output.
+        If None, all columns are written.
+    """
     if schema is not None:
         hits_df = hits_df.select(schema.keys())
+
+    if isinstance(hits_df, pl.LazyFrame):
+        hits_df = hits_df.collect()
+
     hits_df.write_csv(out_path, separator="\t")
 
 
-def write_hits(hits_df, peaks_df, motifs_df, qc_df, out_dir, motif_width):
+def write_hits(
+    hits_df: Union[pl.DataFrame, pl.LazyFrame],
+    peaks_df: pl.DataFrame,
+    motifs_df: pl.DataFrame,
+    qc_df: pl.DataFrame,
+    out_dir: str,
+    motif_width: int,
+) -> None:
+    """Write comprehensive hit results to multiple output files.
+
+    This function combines hit data with peak, motif, and quality control information
+    to generate complete output files including genomic coordinates and scores.
+
+    Parameters
+    ----------
+    hits_df : Union[pl.DataFrame, pl.LazyFrame]
+        Hit data containing motif instance information.
+    peaks_df : pl.DataFrame
+        Peak region information for coordinate conversion.
+    motifs_df : pl.DataFrame
+        Motif metadata for annotation and trimming information.
+    qc_df : pl.DataFrame
+        Quality control data for normalization factors.
+    out_dir : str
+        Output directory for results files. Will be created if it doesn't exist.
+    motif_width : int
+        Width of motif instances for coordinate calculations.
+
+    Notes
+    -----
+    Creates three output files:
+    - hits.tsv: Complete hit data with all instances
+    - hits_unique.tsv: Deduplicated hits by genomic position and motif
+    - hits.bed: BED format file for genome browser visualization
+    """
     os.makedirs(out_dir, exist_ok=True)
     out_path_tsv = os.path.join(out_dir, "hits.tsv")
     out_path_tsv_unique = os.path.join(out_dir, "hits_unique.tsv")
     out_path_bed = os.path.join(out_dir, "hits.bed")
 
     data_all = (
-        hits_df
-        .lazy()
+        hits_df.lazy()
         .join(peaks_df.lazy(), on="peak_id", how="inner")
         .join(qc_df.lazy(), on="peak_id", how="inner")
         .join(motifs_df.lazy(), on="motif_id", how="inner")
         .select(
             chr_id=pl.col("chr_id"),
             chr=pl.col("chr"),
-            start=pl.col("peak_region_start") + pl.col("hit_start") + pl.col("motif_start"),
+            start=pl.col("peak_region_start")
+            + pl.col("hit_start")
+            + pl.col("motif_start"),
             end=pl.col("peak_region_start") + pl.col("hit_start") + pl.col("motif_end"),
             start_untrimmed=pl.col("peak_region_start") + pl.col("hit_start"),
-            end_untrimmed=pl.col("peak_region_start") + pl.col("hit_start") + motif_width,
+            end_untrimmed=pl.col("peak_region_start")
+            + pl.col("hit_start")
+            + motif_width,
             motif_name=pl.col("motif_name"),
             hit_coefficient=pl.col("hit_coefficient"),
-            hit_coefficient_global=pl.col("hit_coefficient") * (pl.col("global_scale")**2),
+            hit_coefficient_global=pl.col("hit_coefficient")
+            * (pl.col("global_scale") ** 2),
             hit_similarity=pl.col("hit_similarity"),
             hit_correlation=pl.col("hit_similarity"),
             hit_importance=pl.col("hit_importance") * pl.col("global_scale"),
-            hit_importance_sq=pl.col("hit_importance_sq") * (pl.col("global_scale")**2),
+            hit_importance_sq=pl.col("hit_importance_sq")
+            * (pl.col("global_scale") ** 2),
             strand=pl.col("strand"),
             peak_name=pl.col("peak_name"),
             peak_id=pl.col("peak_id"),
-            motif_lambda = pl.col("lambda"),
+            motif_lambda=pl.col("lambda"),
         )
         .sort(["chr_id", "start"])
         .select(HITS_DTYPES.keys())
     )
 
-    data_unique = (
-        data_all
-        .unique(subset=["chr", "start", "motif_name", "strand"], maintain_order=True)
+    data_unique = data_all.unique(
+        subset=["chr", "start", "motif_name", "strand"], maintain_order=True
     )
 
-    data_bed = (
-        data_unique
-        .select(
-            chr=pl.col("chr"),
-            start=pl.col("start"),
-            end=pl.col("end"),
-            motif_name=pl.col("motif_name"),
-            score=pl.lit(0),
-            strand=pl.col("strand")
-        )
+    data_bed = data_unique.select(
+        chr=pl.col("chr"),
+        start=pl.col("start"),
+        end=pl.col("end"),
+        motif_name=pl.col("motif_name"),
+        score=pl.lit(0),
+        strand=pl.col("strand"),
     )
 
     data_all.collect().write_csv(out_path_tsv, separator="\t")
@@ -537,10 +1293,20 @@ def write_hits(hits_df, peaks_df, motifs_df, qc_df, out_dir, motif_width):
     data_bed.collect().write_csv(out_path_bed, include_header=False, separator="\t")
 
 
-def write_qc(qc_df, peaks_df, out_path):
+def write_qc(qc_df: pl.DataFrame, peaks_df: pl.DataFrame, out_path: str) -> None:
+    """Write quality control data with peak information to TSV file.
+
+    Parameters
+    ----------
+    qc_df : pl.DataFrame
+        Quality control metrics for each peak region.
+    peaks_df : pl.DataFrame
+        Peak region information for coordinate annotation.
+    out_path : str
+        Output path for the TSV file.
+    """
     df = (
-        qc_df
-        .lazy()
+        qc_df.lazy()
         .join(peaks_df.lazy(), on="peak_id", how="inner")
         .sort(["chr_id", "peak_region_start"])
         .drop("chr_id")
@@ -549,7 +1315,16 @@ def write_qc(qc_df, peaks_df, out_path):
     df.write_csv(out_path, separator="\t")
 
 
-def write_motifs_df(motifs_df, out_path):
+def write_motifs_df(motifs_df: pl.DataFrame, out_path: str) -> None:
+    """Write motif metadata to TSV file.
+
+    Parameters
+    ----------
+    motifs_df : pl.DataFrame
+        Motif metadata DataFrame.
+    out_path : str
+        Output path for the TSV file.
+    """
     motifs_df.write_csv(out_path, separator="\t")
 
 
@@ -564,42 +1339,132 @@ def write_motifs_df(motifs_df, out_path):
     "lambda": pl.Float32,
 }
 
-def load_motifs_df(motifs_path):
+
+def load_motifs_df(motifs_path: str) -> Tuple[pl.DataFrame, ndarray]:
+    """Load motif metadata from TSV file.
+
+    Parameters
+    ----------
+    motifs_path : str
+        Path to motif metadata TSV file.
+
+    Returns
+    -------
+    motifs_df : pl.DataFrame
+        Motif metadata with predefined schema.
+    motif_names : ndarray
+        Array of unique forward-strand motif names.
+    """
     motifs_df = pl.read_csv(motifs_path, separator="\t", schema=MOTIF_DTYPES)
-    motif_names = motifs_df.filter(pl.col("strand") == "+").get_column("motif_name").to_numpy()
+    motif_names = (
+        motifs_df.filter(pl.col("strand") == "+").get_column("motif_name").to_numpy()
+    )
 
     return motifs_df, motif_names
 
 
-def write_motif_cwms(cwms, out_path):
+def write_motif_cwms(cwms: Float[ndarray, "M 4 W"], out_path: str) -> None:
+    """Write motif contribution weight matrices to .npy file.
+
+    Parameters
+    ----------
+    cwms : Float[ndarray, "M 4 W"]
+        Contribution weight matrices for M motifs, 4 nucleotides, W width.
+    out_path : str
+        Output path for the .npy file.
+    """
     np.save(out_path, cwms)
 
 
-def load_motif_cwms(cwms_path):
+def load_motif_cwms(cwms_path: str) -> Float[ndarray, "M 4 W"]:
+    """Load motif contribution weight matrices from .npy file.
+
+    Parameters
+    ----------
+    cwms_path : str
+        Path to .npy file containing CWMs.
+
+    Returns
+    -------
+    Float[ndarray, "M 4 W"]
+        Loaded contribution weight matrices.
+    """
     return np.load(cwms_path)
 
 
-def write_params(params, out_path):
+def write_params(params: Dict[str, Any], out_path: str) -> None:
+    """Write parameter dictionary to JSON file.
+
+    Parameters
+    ----------
+    params : Dict[str, Any]
+        Parameter dictionary to serialize.
+    out_path : str
+        Output path for the JSON file.
+    """
     with open(out_path, "w") as f:
         json.dump(params, f, indent=4)
 
 
-def load_params(params_path):
+def load_params(params_path: str) -> Dict[str, Any]:
+    """Load parameter dictionary from JSON file.
+
+    Parameters
+    ----------
+    params_path : str
+        Path to JSON file containing parameters.
+
+    Returns
+    -------
+    Dict[str, Any]
+        Loaded parameter dictionary.
+    """
     with open(params_path) as f:
         params = json.load(f)
 
     return params
 
 
-def write_occ_df(occ_df, out_path):
+def write_occ_df(occ_df: pl.DataFrame, out_path: str) -> None:
+    """Write occurrence data to TSV file.
+
+    Parameters
+    ----------
+    occ_df : pl.DataFrame
+        Occurrence data DataFrame.
+    out_path : str
+        Output path for the TSV file.
+    """
     occ_df.write_csv(out_path, separator="\t")
 
 
-def write_seqlet_confusion_df(seqlet_confusion_df, out_path):
+def write_seqlet_confusion_df(seqlet_confusion_df: pl.DataFrame, out_path: str) -> None:
+    """Write seqlet confusion matrix data to TSV file.
+
+    Parameters
+    ----------
+    seqlet_confusion_df : pl.DataFrame
+        Seqlet confusion matrix DataFrame.
+    out_path : str
+        Output path for the TSV file.
+    """
     seqlet_confusion_df.write_csv(out_path, separator="\t")
 
 
-def write_report_data(report_df, cwms, out_dir):
+def write_report_data(
+    report_df: pl.DataFrame, cwms: Dict[str, Dict[str, ndarray]], out_dir: str
+) -> None:
+    """Write comprehensive motif report data including CWMs and metadata.
+
+    Parameters
+    ----------
+    report_df : pl.DataFrame
+        Report metadata DataFrame.
+    cwms : Dict[str, Dict[str, ndarray]]
+        Nested dictionary of motif names to CWM types to arrays.
+    out_dir : str
+        Output directory for report files.
+    """
     cwms_dir = os.path.join(out_dir, "CWMs")
     os.makedirs(cwms_dir, exist_ok=True)
 
@@ -610,4 +1475,3 @@ def write_report_data(report_df, cwms, out_dir):
             np.savetxt(os.path.join(motif_dir, f"{cwm_type}.txt"), cwm)
 
     report_df.write_csv(os.path.join(out_dir, "motif_report.tsv"), separator="\t")
-
diff --git a/src/finemo/evaluation.py b/src/finemo/evaluation.py
index 98bb919..61a89b2 100644
--- a/src/finemo/evaluation.py
+++ b/src/finemo/evaluation.py
@@ -1,21 +1,69 @@
+"""Evaluation module for assessing Fi-NeMo motif discovery and hit calling performance.
+
+This module provides functions for:
+- Computing motif occurrence statistics and co-occurrence patterns
+- Evaluating motif discovery quality against TF-MoDISCo results
+- Analyzing hit calling performance and recall metrics
+- Generating confusion matrices for seqlet-hit comparisons
+"""
+
 import warnings
+from typing import List, Tuple, Dict, Any, Union
 
 import numpy as np
+from numpy import ndarray
 import polars as pl
-
-
-def get_motif_occurences(hits_df, motif_names):
+from jaxtyping import Float, Int
+
+
+def get_motif_occurences(
+    hits_df: pl.LazyFrame, motif_names: List[str]
+) -> Tuple[pl.DataFrame, Int[ndarray, "M M"]]:
+    """Compute motif occurrence statistics and co-occurrence matrix.
+
+    This function analyzes motif occurrence patterns across peaks by creating
+    a pivot table of hit counts and computing pairwise co-occurrence statistics.
+
+    Parameters
+    ----------
+    hits_df : pl.LazyFrame
+        Lazy DataFrame containing hit data with required columns:
+        - peak_id : Peak identifier
+        - motif_name : Name of the motif
+        Additional columns are ignored.
+    motif_names : List[str]
+        List of motif names to include in analysis. Missing motifs
+        will be added as columns with zero counts.
+
+    Returns
+    -------
+    occ_df : pl.DataFrame
+        DataFrame with motif occurrence counts per peak. Contains:
+        - peak_id column
+        - One column per motif with hit counts
+        - 'total' column summing all motif counts per peak
+    coocc : Int[ndarray, "M M"]
+        Co-occurrence matrix where M = len(motif_names).
+        Entry (i,j) indicates number of peaks containing both motif i and motif j.
+        Diagonal entries show total peaks containing each motif.
+
+    Notes
+    -----
+    The co-occurrence matrix is computed using binary occurrence indicators,
+    so multiple hits of the same motif in a peak are treated as a single occurrence.
+    """
     occ_df = (
-        hits_df
-        .collect()
-        .pivot(index="peak_id", columns="motif_name", values="count", aggregate_function="sum")
+        hits_df.collect()
+        .with_columns(pl.lit(1).alias("count"))
+        .pivot(
+            on="motif_name", index="peak_id", values="count", aggregate_function="sum"
+        )
         .fill_null(0)
     )
 
     missing_cols = set(motif_names) - set(occ_df.columns)
     occ_df = (
-        occ_df
-        .with_columns([pl.lit(0).alias(m) for m in missing_cols])
+        occ_df.with_columns([pl.lit(0).alias(m) for m in missing_cols])
         .with_columns(total=pl.sum_horizontal(*motif_names))
         .sort(["peak_id"])
     )
@@ -25,7 +73,7 @@ def get_motif_occurences(hits_df, motif_names):
 
     occ_mat = np.zeros((num_peaks, num_motifs), dtype=np.int16)
     for i, m in enumerate(motif_names):
-        occ_mat[:,i] = occ_df.get_column(m).to_numpy()
+        occ_mat[:, i] = occ_df.get_column(m).to_numpy()
 
     occ_bin = (occ_mat > 0).astype(np.int32)
     coocc = occ_bin.T @ occ_bin
@@ -33,113 +81,250 @@ def get_motif_occurences(hits_df, motif_names):
     return occ_df, coocc
 
 
-def get_cwms(regions, positions_df, motif_width):
-    idx_df = (
-        positions_df
-        .select(
-            peak_idx=pl.col("peak_id"),
-            start_idx=pl.col("start_untrimmed") - pl.col("peak_region_start"),
-            is_revcomp=pl.col("is_revcomp")
-        )
+def get_cwms(
+    regions: Float[ndarray, "N 4 L"], positions_df: pl.DataFrame, motif_width: int
+) -> Float[ndarray, "H 4 W"]:
+    """Extract contribution weight matrices from regions based on hit positions.
+
+    This function extracts motif-sized windows from contribution score regions
+    at positions specified by hit coordinates. It handles both forward and
+    reverse complement orientations and filters out invalid positions.
+
+    Parameters
+    ----------
+    regions : Float[ndarray, "N 4 L"]
+        Input contribution score regions multiplied by one-hot sequences.
+        Shape: (n_peaks, 4, region_width) where 4 represents DNA bases (A,C,G,T).
+    positions_df : pl.DataFrame
+        DataFrame containing hit positions with required columns:
+        - peak_id : int, Peak index (0-based)
+        - start_untrimmed : int, Start position in genomic coordinates
+        - peak_region_start : int, Peak region start coordinate
+        - is_revcomp : bool, Whether hit is on reverse complement strand
+    motif_width : int
+        Width of motifs to extract. Must be positive.
+
+    Returns
+    -------
+    cwms : Float[ndarray, "H 4 W"]
+        Extracted contribution weight matrices for valid hits.
+        Shape: (n_valid_hits, 4, motif_width)
+        Invalid hits (outside region boundaries) are filtered out.
+
+    Notes
+    -----
+    - Start positions are converted from genomic to region-relative coordinates
+    - Reverse complement hits have their sequence order reversed
+    - Hits extending beyond region boundaries are excluded
+    - The mean is computed across all valid hits, with warnings suppressed
+      for empty slices or invalid operations
+
+    Raises
+    ------
+    ValueError
+        If motif_width is non-positive or positions_df lacks required columns.
+    """
+    idx_df = positions_df.select(
+        peak_idx=pl.col("peak_id"),
+        start_idx=pl.col("start_untrimmed") - pl.col("peak_region_start"),
+        is_revcomp=pl.col("is_revcomp"),
     )
-    peak_idx = idx_df.get_column('peak_idx').to_numpy()
-    start_idx = idx_df.get_column('start_idx').to_numpy()
+    peak_idx = idx_df.get_column("peak_idx").to_numpy()
+    start_idx = idx_df.get_column("start_idx").to_numpy()
     is_revcomp = idx_df.get_column("is_revcomp").to_numpy().astype(bool)
 
-    # Ignore hits outside of region
+    # Filter hits that fall outside the region boundaries
     valid_mask = (start_idx >= 0) & (start_idx + motif_width <= regions.shape[2])
     peak_idx = peak_idx[valid_mask]
     start_idx = start_idx[valid_mask]
     is_revcomp = is_revcomp[valid_mask]
 
-    row_idx = peak_idx[:,None,None]
-    pos_idx = start_idx[:,None,None] + np.zeros((1,1,motif_width), dtype=int)
-    pos_idx[~is_revcomp,:,:] += np.arange(motif_width)[None,None,:]
-    pos_idx[is_revcomp,:,:] += np.arange(motif_width)[None,None,::-1]
-    nuc_idx = np.zeros((peak_idx.shape[0],4,1), dtype=int)
-    nuc_idx[~is_revcomp,:,:] += np.arange(4)[None,:,None]
-    nuc_idx[is_revcomp,:,:] += np.arange(4)[None,::-1,None]
+    row_idx = peak_idx[:, None, None]
+    pos_idx = start_idx[:, None, None] + np.zeros((1, 1, motif_width), dtype=int)
+    pos_idx[~is_revcomp, :, :] += np.arange(motif_width)[None, None, :]
+    pos_idx[is_revcomp, :, :] += np.arange(motif_width)[None, None, ::-1]
+    nuc_idx = np.zeros((peak_idx.shape[0], 4, 1), dtype=int)
+    nuc_idx[~is_revcomp, :, :] += np.arange(4)[None, :, None]
+    nuc_idx[is_revcomp, :, :] += np.arange(4)[None, ::-1, None]
 
     seqs = regions[row_idx, nuc_idx, pos_idx]
-    
+
     with warnings.catch_warnings():
-        warnings.filterwarnings(action='ignore', message='invalid value encountered in divide')
-        warnings.filterwarnings(action='ignore', message='Mean of empty slice')
+        warnings.filterwarnings(
+            action="ignore", message="invalid value encountered in divide"
+        )
+        warnings.filterwarnings(action="ignore", message="Mean of empty slice")
         cwms = seqs.mean(axis=0)
 
     return cwms
 
 
-def tfmodisco_comparison(regions, hits_df, peaks_df, seqlets_df, motifs_df, cwms_modisco, 
-                         motif_names, modisco_half_width, motif_width, compute_recall):
+def tfmodisco_comparison(
+    regions: Float[ndarray, "N 4 L"],
+    hits_df: Union[pl.DataFrame, pl.LazyFrame],
+    peaks_df: pl.DataFrame,
+    seqlets_df: Union[pl.DataFrame, pl.LazyFrame, None],
+    motifs_df: pl.DataFrame,
+    cwms_modisco: Float[ndarray, "M 4 W"],
+    motif_names: List[str],
+    modisco_half_width: int,
+    motif_width: int,
+    compute_recall: bool,
+) -> Tuple[
+    Dict[str, Dict[str, Any]],
+    pl.DataFrame,
+    Dict[str, Dict[str, Float[ndarray, "4 W"]]],
+    Dict[str, Dict[str, Tuple[int, int]]],
+]:
+    """Compare Fi-NeMo hits with TF-MoDISCo seqlets and compute evaluation metrics.
+
+    This function performs comprehensive comparison between Fi-NeMo hit calls
+    and TF-MoDISCo seqlets, computing recall metrics, CWM similarities,
+    and extracting contribution weight matrices for visualization.
+
+    Parameters
+    ----------
+    regions : Float[ndarray, "N 4 L"]
+        Contribution score regions multiplied by one-hot sequences.
+        Shape: (n_peaks, 4, region_length)
+    hits_df : Union[pl.DataFrame, pl.LazyFrame]
+        Fi-NeMo hit calls with required columns:
+        - peak_id, start_untrimmed, end_untrimmed, strand, motif_name
+    peaks_df : pl.DataFrame
+        Peak metadata with columns:
+        - peak_id, chr_id, peak_region_start
+    seqlets_df : Optional[pl.DataFrame]
+        TF-MoDISCo seqlets with columns:
+        - chr_id, start_untrimmed, is_revcomp, motif_name
+        If None, only basic hit statistics are computed.
+    motifs_df : pl.DataFrame
+        Motif metadata with columns:
+        - motif_name, strand, motif_id, motif_start, motif_end
+    cwms_modisco : Float[ndarray, "M 4 W"]
+        TF-MoDISCo contribution weight matrices.
+        Shape: (n_modisco_motifs, 4, motif_width)
+    motif_names : List[str]
+        Names of motifs to analyze.
+    modisco_half_width : int
+        Half-width for restricting hits to central region for fair comparison.
+    motif_width : int
+        Width of motifs for CWM extraction.
+    compute_recall : bool
+        Whether to compute recall metrics requiring seqlets_df.
+
+    Returns
+    -------
+    report_data : Dict[str, Dict[str, Any]]
+        Per-motif evaluation metrics including:
+        - num_hits_total, num_hits_restricted, num_seqlets
+        - num_overlaps, seqlet_recall, cwm_similarity
+    report_df : pl.DataFrame
+        Tabular format of report_data for easy analysis.
+    cwms : Dict[str, Dict[str, Float[ndarray, "4 W"]]]
+        Extracted CWMs for each motif and condition:
+        - hits_fc, hits_rc: Forward/reverse complement hits
+        - modisco_fc, modisco_rc: TF-MoDISCo forward/reverse
+        - seqlets_only, hits_restricted_only: Non-overlapping instances
+    cwm_trim_bounds : Dict[str, Dict[str, Tuple[int, int]]]
+        Trimming boundaries for each CWM type and motif.
+
+    Notes
+    -----
+    - Hits are filtered to central region defined by modisco_half_width
+    - CWM similarity is computed as normalized dot product between hit and TF-MoDISCo CWMs
+    - Recall metrics require both hits_df and seqlets_df to be non-empty
+    - Missing motifs are handled gracefully with empty DataFrames
+
+    Raises
+    ------
+    ValueError
+        If required columns are missing from input DataFrames.
+    """
+
+    # Ensure hits_df is LazyFrame for consistent operations
+    if isinstance(hits_df, pl.DataFrame):
+        hits_df = hits_df.lazy()
+
     hits_df = (
-        hits_df
-        .with_columns(pl.col('peak_id').cast(pl.UInt32))
-        .join(
-            peaks_df.lazy(), on="peak_id", how="inner"
-        )
+        hits_df.with_columns(pl.col("peak_id").cast(pl.UInt32))
+        .join(peaks_df.lazy(), on="peak_id", how="inner")
         .select(
             chr_id=pl.col("chr_id"),
             start_untrimmed=pl.col("start_untrimmed"),
             end_untrimmed=pl.col("end_untrimmed"),
-            is_revcomp=pl.col("strand") == '-',
+            is_revcomp=pl.col("strand") == "-",
             motif_name=pl.col("motif_name"),
             peak_region_start=pl.col("peak_region_start"),
-            peak_id=pl.col("peak_id")
+            peak_id=pl.col("peak_id"),
         )
     )
 
-    hits_unique = hits_df.unique(subset=["chr_id", "start_untrimmed", "motif_name", "is_revcomp"])
-    
+    hits_unique = hits_df.unique(
+        subset=["chr_id", "start_untrimmed", "motif_name", "is_revcomp"]
+    )
+
     region_len = regions.shape[2]
     center = region_len / 2
-    hits_filtered = (
-        hits_df
-        .filter(
-            ((pl.col("start_untrimmed") - pl.col("peak_region_start")) >= (center - modisco_half_width)) 
-            & ((pl.col("end_untrimmed") - pl.col("peak_region_start")) <= (center + modisco_half_width))
+    hits_filtered = hits_df.filter(
+        (
+            (pl.col("start_untrimmed") - pl.col("peak_region_start"))
+            >= (center - modisco_half_width)
         )
-        .unique(subset=["chr_id", "start_untrimmed", "motif_name", "is_revcomp"])
-    )
-    
-    if compute_recall:
-        overlaps_df = (
-            hits_filtered.join(
-                seqlets_df, 
-                on=["chr_id", "start_untrimmed", "is_revcomp", "motif_name"],
-                how="inner",
-            )
-            .collect()
+        & (
+            (pl.col("end_untrimmed") - pl.col("peak_region_start"))
+            <= (center + modisco_half_width)
         )
+    ).unique(subset=["chr_id", "start_untrimmed", "motif_name", "is_revcomp"])
 
-        seqlets_only_df = (
-            seqlets_df.join(
-                hits_df, 
-                on=["chr_id", "start_untrimmed", "is_revcomp", "motif_name"],
-                how="anti",
-            )
-            .collect()
-        )
+    hits_by_motif = hits_unique.collect().partition_by("motif_name", as_dict=True)
+    hits_filtered_by_motif = hits_filtered.collect().partition_by(
+        "motif_name", as_dict=True
+    )
 
-        hits_only_filtered_df = (
-            hits_filtered.join(
-                seqlets_df, 
-                on=["chr_id", "start_untrimmed", "is_revcomp", "motif_name"],
-                how="anti",
-            )
-            .collect()
-        )
+    if seqlets_df is None:
+        seqlets_collected = None
+        seqlets_lazy = None
+    elif isinstance(seqlets_df, pl.LazyFrame):
+        seqlets_collected = seqlets_df.collect()
+        seqlets_lazy = seqlets_df
+    else:
+        seqlets_collected = seqlets_df
+        seqlets_lazy = seqlets_df.lazy()
+
+    if seqlets_collected is not None:
+        seqlets_by_motif = seqlets_collected.partition_by("motif_name", as_dict=True)
+    else:
+        seqlets_by_motif = {}
+
+    if compute_recall and seqlets_lazy is not None:
+        overlaps_df = hits_filtered.join(
+            seqlets_lazy,
+            on=["chr_id", "start_untrimmed", "is_revcomp", "motif_name"],
+            how="inner",
+        ).collect()
 
-    hits_by_motif = hits_unique.collect().partition_by("motif_name", as_dict=True)
-    hits_fitered_by_motif = hits_filtered.collect().partition_by("motif_name", as_dict=True)
+        seqlets_only_df = seqlets_lazy.join(
+            hits_df,
+            on=["chr_id", "start_untrimmed", "is_revcomp", "motif_name"],
+            how="anti",
+        ).collect()
 
-    if seqlets_df is not None:
-        seqlets_by_motif = seqlets_df.collect().partition_by("motif_name", as_dict=True)
+        hits_only_filtered_df = hits_filtered.join(
+            seqlets_lazy,
+            on=["chr_id", "start_untrimmed", "is_revcomp", "motif_name"],
+            how="anti",
+        ).collect()
 
-    if compute_recall:
+        # Create partition dictionaries
         overlaps_by_motif = overlaps_df.partition_by("motif_name", as_dict=True)
         seqlets_only_by_motif = seqlets_only_df.partition_by("motif_name", as_dict=True)
-        hits_only_filtered_by_motif = hits_only_filtered_df.partition_by("motif_name", as_dict=True)
+        hits_only_filtered_by_motif = hits_only_filtered_df.partition_by(
+            "motif_name", as_dict=True
+        )
+    else:
+        overlaps_by_motif = {}
+        seqlets_only_by_motif = {}
+        hits_only_filtered_by_motif = {}
 
     report_data = {}
     cwms = {}
@@ -147,12 +332,18 @@ def tfmodisco_comparison(regions, hits_df, peaks_df, seqlets_df, motifs_df, cwms
     dummy_df = hits_df.clear().collect()
     for m in motif_names:
         hits = hits_by_motif.get((m,), dummy_df)
-        hits_filtered = hits_fitered_by_motif.get((m,), dummy_df)
+        hits_filtered = hits_filtered_by_motif.get((m,), dummy_df)
+
+        # Initialize default values
+        seqlets = dummy_df
+        overlaps = dummy_df
+        seqlets_only = dummy_df
+        hits_only_filtered = dummy_df
 
         if seqlets_df is not None:
             seqlets = seqlets_by_motif.get((m,), dummy_df)
 
-        if compute_recall:
+        if compute_recall and seqlets_df is not None:
             overlaps = overlaps_by_motif.get((m,), dummy_df)
             seqlets_only = seqlets_only_by_motif.get((m,), dummy_df)
             hits_only_filtered = hits_only_filtered_by_motif.get((m,), dummy_df)
@@ -165,48 +356,56 @@ def tfmodisco_comparison(regions, hits_df, peaks_df, seqlets_df, motifs_df, cwms
         if seqlets_df is not None:
             report_data[m]["num_seqlets"] = seqlets.height
 
-        if compute_recall:
+        if compute_recall and seqlets_df is not None:
             report_data[m] |= {
                 "num_overlaps": overlaps.height,
                 "num_seqlets_only": seqlets_only.height,
                 "num_hits_restricted_only": hits_only_filtered.height,
                 "seqlet_recall": np.float64(overlaps.height) / seqlets.height
+                if seqlets.height > 0
+                else 0.0,
             }
 
-        motif_data_fc = motifs_df.row(by_predicate=(pl.col("motif_name") == m) 
-                                      & (pl.col("strand") == "+"), named=True)
-        motif_data_rc = motifs_df.row(by_predicate=(pl.col("motif_name") == m) 
-                                      & (pl.col("strand") == "-"), named=True)
+        motif_data_fc = motifs_df.row(
+            by_predicate=(pl.col("motif_name") == m) & (pl.col("strand") == "+"),
+            named=True,
+        )
+        motif_data_rc = motifs_df.row(
+            by_predicate=(pl.col("motif_name") == m) & (pl.col("strand") == "-"),
+            named=True,
+        )
 
         cwms[m] = {
             "hits_fc": get_cwms(regions, hits, motif_width),
             "modisco_fc": cwms_modisco[motif_data_fc["motif_id"]],
             "modisco_rc": cwms_modisco[motif_data_rc["motif_id"]],
         }
-        cwms[m]["hits_rc"] = cwms[m]["hits_fc"][::-1,::-1]
+        cwms[m]["hits_rc"] = cwms[m]["hits_fc"][::-1, ::-1]
 
-        if compute_recall:
+        if compute_recall and seqlets_df is not None:
             cwms[m] |= {
                 "seqlets_only": get_cwms(regions, seqlets_only, motif_width),
-                "hits_restricted_only": get_cwms(regions, hits_only_filtered, motif_width),
+                "hits_restricted_only": get_cwms(
+                    regions, hits_only_filtered, motif_width
+                ),
             }
 
         bounds_fc = (motif_data_fc["motif_start"], motif_data_fc["motif_end"])
         bounds_rc = (motif_data_rc["motif_start"], motif_data_rc["motif_end"])
-        
+
         cwm_trim_bounds[m] = {
             "hits_fc": bounds_fc,
             "modisco_fc": bounds_fc,
             "modisco_rc": bounds_rc,
-            "hits_rc": bounds_rc
+            "hits_rc": bounds_rc,
         }
 
-        if compute_recall:
+        if compute_recall and seqlets_df is not None:
             cwm_trim_bounds[m] |= {
                 "seqlets_only": bounds_fc,
                 "hits_restricted_only": bounds_fc,
             }
-        
+
         hits_cwm = cwms[m]["hits_fc"]
         modisco_cwm = cwms[m]["modisco_fc"]
         hnorm = np.sqrt((hits_cwm**2).sum())
@@ -221,75 +420,121 @@ def tfmodisco_comparison(regions, hits_df, peaks_df, seqlets_df, motifs_df, cwms
     return report_data, report_df, cwms, cwm_trim_bounds
 
 
-def seqlet_confusion(hits_df, seqlets_df, peaks_df, motif_names, motif_width):
+def seqlet_confusion(
+    hits_df: Union[pl.DataFrame, pl.LazyFrame],
+    seqlets_df: Union[pl.DataFrame, pl.LazyFrame],
+    peaks_df: pl.DataFrame,
+    motif_names: List[str],
+    motif_width: int,
+) -> Tuple[pl.DataFrame, Float[ndarray, "M M"]]:
+    """Compute confusion matrix between TF-MoDISCo seqlets and Fi-NeMo hits.
+
+    This function creates a confusion matrix showing the overlap between
+    TF-MoDISCo seqlets (ground truth) and Fi-NeMo hits across different motifs.
+    Overlap frequencies are estimated using binned genomic coordinates.
+
+    Parameters
+    ----------
+    hits_df : Union[pl.DataFrame, pl.LazyFrame]
+        Fi-NeMo hit calls with required columns:
+        - peak_id, start_untrimmed, end_untrimmed, strand, motif_name
+    seqlets_df : pl.DataFrame
+        TF-MoDISCo seqlets with required columns:
+        - chr_id, start_untrimmed, end_untrimmed, motif_name
+    peaks_df : pl.DataFrame
+        Peak metadata for joining coordinates:
+        - peak_id, chr_id
+    motif_names : List[str]
+        Names of motifs to include in confusion matrix.
+        Determines matrix dimensions.
+    motif_width : int
+        Width used for binning genomic coordinates.
+        Positions are binned to motif_width resolution.
+
+    Returns
+    -------
+    confusion_df : pl.DataFrame
+        Detailed confusion matrix in tabular format with columns:
+        - motif_name_seqlets : Seqlet motif labels (rows)
+        - motif_name_hits : Hit motif labels (columns)
+        - frac_overlap : Fraction of seqlets overlapping with hits
+    confusion_mat : Float[ndarray, "M M"]
+        Confusion matrix where M = len(motif_names).
+        Entry (i,j) = fraction of motif i seqlets overlapping with motif j hits.
+        Rows represent seqlet motifs, columns represent hit motifs.
+
+    Notes
+    -----
+    - Genomic coordinates are binned to motif_width resolution for overlap detection
+    - Only exact bin overlaps are considered (same chr_id, start_bin, end_bin)
+    - Fractions are computed as: overlaps / total_seqlets_per_motif
+    - Missing motif combinations result in zero entries in the confusion matrix
+
+    Raises
+    ------
+    ValueError
+        If required columns are missing from input DataFrames.
+    KeyError
+        If motif names in data don't match those in motif_names list.
+    """
     bin_size = motif_width
 
+    # Ensure hits_df is LazyFrame for consistent operations
+    if isinstance(hits_df, pl.DataFrame):
+        hits_df = hits_df.lazy()
+
     hits_binned = (
-        hits_df
-        .with_columns(
-            peak_id=pl.col('peak_id').cast(pl.UInt32),
-            is_revcomp=pl.col("strand") == '-'
-        )
-        .join(
-            peaks_df.lazy(), on="peak_id", how="inner"
+        hits_df.with_columns(
+            peak_id=pl.col("peak_id").cast(pl.UInt32),
+            is_revcomp=pl.col("strand") == "-",
         )
+        .join(peaks_df.lazy(), on="peak_id", how="inner")
         .unique(subset=["chr_id", "start_untrimmed", "motif_name", "is_revcomp"])
         .select(
             chr_id=pl.col("chr_id"),
             start_bin=pl.col("start_untrimmed") // bin_size,
             end_bin=pl.col("end_untrimmed") // bin_size,
-            motif_name=pl.col("motif_name")
+            motif_name=pl.col("motif_name"),
         )
     )
 
-    seqlets_binned = (
-        seqlets_df
-        .select(
-            chr_id=pl.col("chr_id"),
-            start_bin=pl.col("start_untrimmed") // bin_size,
-            end_bin=pl.col("end_untrimmed") // bin_size,
-            motif_name=pl.col("motif_name")
-        )
+    seqlets_lazy = seqlets_df.lazy()
+    seqlets_binned = seqlets_lazy.select(
+        chr_id=pl.col("chr_id"),
+        start_bin=pl.col("start_untrimmed") // bin_size,
+        end_bin=pl.col("end_untrimmed") // bin_size,
+        motif_name=pl.col("motif_name"),
     )
 
-    overlaps_df = (
-        seqlets_binned.join(
-            hits_binned, 
-            on=["chr_id", "start_bin", "end_bin"],
-            how="inner",
-            suffix="_hits"
-        )
+    overlaps_df = seqlets_binned.join(
+        hits_binned, on=["chr_id", "start_bin", "end_bin"], how="inner", suffix="_hits"
     )
 
-    seqlet_counts = seqlets_binned.group_by("motif_name").len(name="num_seqlets")
-    overlap_counts = overlaps_df.group_by(["motif_name", "motif_name_hits"]).len(name="num_overlaps")
+    seqlet_counts = (
+        seqlets_binned.group_by("motif_name").len(name="num_seqlets").collect()
+    )
+    overlap_counts = (
+        overlaps_df.group_by(["motif_name", "motif_name_hits"])
+        .len(name="num_overlaps")
+        .collect()
+    )
 
     num_motifs = len(motif_names)
     confusion_mat = np.zeros((num_motifs, num_motifs), dtype=np.float32)
     name_to_idx = {m: i for i, m in enumerate(motif_names)}
 
-    confusion_df = (
-        overlap_counts
-        .join(
-            seqlet_counts,
-            on="motif_name",
-            how="inner"
-        )
-        .select(
-            motif_name_seqlets=pl.col("motif_name"),
-            motif_name_hits=pl.col("motif_name_hits"),
-            frac_overlap=pl.col("num_overlaps") / pl.col("num_seqlets"),
-        )
-        .collect()
+    confusion_df = overlap_counts.join(
+        seqlet_counts, on="motif_name", how="inner"
+    ).select(
+        motif_name_seqlets=pl.col("motif_name"),
+        motif_name_hits=pl.col("motif_name_hits"),
+        frac_overlap=pl.col("num_overlaps") / pl.col("num_seqlets"),
     )
 
-    confusion_idx_df = (
-        confusion_df
-        .select(
-            row_idx=pl.col("motif_name_seqlets").replace_strict(name_to_idx),
-            col_idx=pl.col("motif_name_hits").replace_strict(name_to_idx),
-            frac_overlap=pl.col("frac_overlap")
-        )
+    confusion_idx_df = confusion_df.select(
+        row_idx=pl.col("motif_name_seqlets").replace_strict(name_to_idx),
+        col_idx=pl.col("motif_name_hits").replace_strict(name_to_idx),
+        frac_overlap=pl.col("frac_overlap"),
     )
 
     row_idx = confusion_idx_df["row_idx"].to_numpy()
@@ -297,7 +542,5 @@ def seqlet_confusion(hits_df, seqlets_df, peaks_df, motif_names, motif_width):
     frac_overlap = confusion_idx_df["frac_overlap"].to_numpy()
 
     confusion_mat[row_idx, col_idx] = frac_overlap
-    
-    return confusion_df, confusion_mat
-
 
+    return confusion_df, confusion_mat
diff --git a/src/finemo/hitcaller.py b/src/finemo/hitcaller.py
index ee0e666..b95c360 100644
--- a/src/finemo/hitcaller.py
+++ b/src/finemo/hitcaller.py
@@ -1,171 +1,538 @@
+"""Hit caller module implementing the Fi-NeMo motif instance calling algorithm.
+
+This module provides the core functionality for identifying transcription factor
+binding motif instances in neural network contribution scores using a competitive
+optimization approach based on proximal gradient descent.
+
+The main algorithm fits a sparse linear model where contribution scores are
+reconstructed as a weighted combination of motif contribution weight matrices (CWMs)
+at specific genomic positions. The sparsity constraint ensures that only the most
+significant motif instances are called.
+"""
+
 import warnings
+from typing import Tuple, Union, Optional, Dict, List
+from abc import ABC, abstractmethod
 
 import numpy as np
+from numpy import ndarray
 import torch
 import torch.nn.functional as F
+from torch import Tensor
 import polars as pl
+from jaxtyping import Float, Int, Bool
 
 from tqdm import tqdm
 
-
-def prox_grad_step(coefficients, importance_scale, cwms, contribs, sequences, 
-                   lambdas, step_sizes):
-    """
-    Proximal gradient descent optimization step for non-negative lasso
-
-    coefficients: (b, m, l - w + 1)
-    importance_scale: (b, 1, l - w + 1)
-    cwms: (m, 4, w)
-    contribs: (b, 4, l)
-    sequences: (b, 4, l) or dummy scalar
-    lambdas: (1, m, 1)
-
-    For details on proximal gradient descent: https://yuxinchen2020.github.io/ele520_math_data/lectures/lasso_algorithm_extension.pdf, slide 22 
-    For details on duality gap computation: https://stanford.edu/~boyd/papers/pdf/l1_ls.pdf, Section III
+# Type aliases for tensor operations
+ArrayLike = Union[ndarray, torch.Tensor]
+
+
+def prox_grad_step(
+    coefficients: Float[Tensor, "B M P"],
+    importance_scale: Float[Tensor, "B 1 P"],
+    cwms: Float[Tensor, "M 4 W"],
+    contribs: Float[Tensor, "B 4 L"],
+    sequences: Union[Int[Tensor, "B 4 L"], int],
+    lambdas: Float[Tensor, "1 M 1"],
+    step_sizes: Float[Tensor, "B 1 1"],
+) -> Tuple[Float[Tensor, "B M P"], Float[Tensor, " B"], Float[Tensor, " B"]]:
+    """Perform a proximal gradient descent optimization step for non-negative lasso.
+
+    This function implements a single optimization step of the Fi-NeMo algorithm,
+    which uses proximal gradient descent to solve a sparse reconstruction problem.
+    The goal is to represent contribution scores as a sparse linear combination
+    of motif contribution weight matrices (CWMs).
+
+    B = batch size, M = number of motifs, L = sequence length, W = motif width.
+    P = L - W + 1 (the number of positions with coefficients).
+
+    Parameters
+    ----------
+    coefficients : Float[Tensor, "B M P"]
+        Current coefficient matrix representing motif instance strengths.
+    importance_scale : Float[Tensor, "B 1 P"]
+        Scaling factors for importance-weighted reconstruction.
+    cwms : Float[Tensor, "M 4 W"]
+        Motif contribution weight matrices for all motifs.
+        4 represents the DNA bases (A, C, G, T).
+    contribs : Float[Tensor, "B 4 L"]
+        Target contribution scores to reconstruct.
+    sequences : Float[Tensor, "B 4 L"] | int
+        One-hot encoded DNA sequences. Can be a scalar (1) for hypothetical mode.
+    lambdas : Float[Tensor, "1 M 1"]
+        L1 regularization weights for each motif.
+    step_sizes : Float[Tensor, "B 1 1"]
+        Optimization step sizes for each batch element.
+
+    Returns
+    -------
+    c_next : Float[Tensor, "B M P"], shape (b, m, l - w + 1)
+        Updated coefficient matrix after the optimization step.
+    dual_gap : Float[Tensor, "B"]
+        Duality gap for convergence assessment.
+    nll : Float[Tensor, "B"]
+        Negative log likelihood (proportional to MSE).
+
+    Notes
+    -----
+    The algorithm uses proximal gradient descent to solve:
+
+    minimize_c: ||contribs - conv_transpose(c * importance_scale, cwms) * sequences||²₂ + λ||c||₁
+
+    subject to: c ≥ 0
+
+    References
+    ----------
+    - Proximal gradient descent: https://yuxinchen2020.github.io/ele520_math_data/lectures/lasso_algorithm_extension.pdf, slide 22
+    - Duality gap computation: https://stanford.edu/~boyd/papers/pdf/l1_ls.pdf, Section III
     """
-    # Forward pass
+    # Forward pass: convolution operations require specific tensor layouts
     coef_adj = coefficients * importance_scale
-    pred_unmasked = F.conv_transpose1d(coef_adj, cwms) # (b, 4, l)
-    pred = pred_unmasked * sequences # (b, 4, l)
+    pred_unmasked = F.conv_transpose1d(coef_adj, cwms)  # (b, 4, l)
+    pred = (
+        pred_unmasked * sequences
+    )  # (b, 4, l), element-wise masking for projected mode
 
     # Compute gradient * -1
-    residuals = contribs - pred # (b, 4, l)
-    ngrad = F.conv1d(residuals, cwms) * importance_scale # (b, m, l - w + 1)
+    residuals = contribs - pred  # (b, 4, l)
+    ngrad = F.conv1d(residuals, cwms) * importance_scale  # (b, m, l - w + 1)
 
     # Negative log likelihood (proportional to MSE)
-    nll = (residuals**2).sum(dim=(1,2)) # (b)
-    
-    # Compute duality gap
-    dual_norm = (ngrad / lambdas).amax(dim=(1,2)) # (b)
-    dual_scale = (torch.clamp(1 / dual_norm, max=1.)**2 + 1) / 2 # (b)
-    nll_scaled = nll * dual_scale # (b)
-
-    dual_diff = (residuals * contribs).sum(dim=(1,2)) # (b)
-    l1_term = (torch.abs(coefficients).sum(dim=2, keepdim=True) * lambdas).sum(dim=(1,2)) # (b)
-    # l1_term = torch.linalg.vector_norm((coefficients * lambdas), ord=1, dim=(1,2)) # (b)
-    dual_gap = (nll_scaled - dual_diff + l1_term).abs() # (b)
+    nll = (residuals**2).sum(dim=(1, 2))  # (b)
 
-    # Compute proximal gradient descent step
-    c_next = coefficients + step_sizes * (ngrad - lambdas) # (b, m, l - w + 1)
-    c_next = F.relu(c_next) # (b, m, l - w + 1)
+    # Compute duality gap for convergence assessment
+    dual_norm = (ngrad / lambdas).amax(dim=(1, 2))  # (b)
+    dual_scale = (torch.clamp(1 / dual_norm, max=1.0) ** 2 + 1) / 2  # (b)
+    nll_scaled = nll * dual_scale  # (b)
 
-    return c_next, dual_gap, nll
+    dual_diff = (residuals * contribs).sum(dim=(1, 2))  # (b)
+    l1_term = (torch.abs(coefficients).sum(dim=2, keepdim=True) * lambdas).sum(
+        dim=(1, 2)
+    )  # (b)
+    dual_gap = (nll_scaled - dual_diff + l1_term).abs()  # (b)
 
+    # Compute proximal gradient descent step
+    c_next = coefficients + step_sizes * (ngrad - lambdas)  # (b, m, l - w + 1)
+    c_next = F.relu(c_next)  # Ensure non-negativity constraint
 
-def optimizer_step(cwms, contribs, importance_scale, sequences, coef_inter, coef, i, step_sizes, l, lambdas):
-    """
-    Non-negative lasso optimizer step with momentum. 
+    return c_next, dual_gap, nll
 
-    cwms: (m, 4, w)
-    contribs: (b, 4, l)
-    importance_scale: (b, 1, l - w + 1)
-    sequences: (b, 4, l) or dummy scalar
-    coef_inter, coef: (b, m, l - w + 1)
-    i, step_sizes: (b,)
 
-    For details on optimization algorithm: https://yuxinchen2020.github.io/ele520_math_data/lectures/lasso_algorithm_extension.pdf, slides 22, 27 
+def optimizer_step(
+    cwms: Float[Tensor, "M 4 W"],
+    contribs: Float[Tensor, "B 4 L"],
+    importance_scale: Float[Tensor, "B 1 P"],
+    sequences: Union[Int[Tensor, "B 4 L"], int],
+    coef_inter: Float[Tensor, "B M P"],
+    coef: Float[Tensor, "B M P"],
+    i: Float[Tensor, "B 1 1"],
+    step_sizes: Float[Tensor, "B 1 1"],
+    sequence_length: int,
+    lambdas: Float[Tensor, "1 M 1"],
+) -> Tuple[
+    Float[Tensor, "B M P"],
+    Float[Tensor, "B M P"],
+    Float[Tensor, " B"],
+    Float[Tensor, " B"],
+]:
+    """Perform a non-negative lasso optimizer step with Nesterov momentum.
+
+    This function combines proximal gradient descent with momentum acceleration
+    to improve convergence speed while maintaining the non-negative constraint
+    on coefficients.
+
+    B = batch size, M = number of motifs, L = sequence length, W = motif width.
+    P = L - W + 1 (the number of positions with coefficients).
+
+    Parameters
+    ----------
+    cwms : Float[Tensor, "M 4 W"]
+        Motif contribution weight matrices.
+    contribs : Float[Tensor, "B 4 L"]
+        Target contribution scores.
+    importance_scale : Float[Tensor, "B 1 P"]
+        Importance scaling factors.
+    sequences : Union[Int[Tensor, "B 4 L"], int]
+        One-hot encoded sequences or scalar for hypothetical mode.
+    coef_inter : Float[Tensor, "B M P"]
+        Intermediate coefficient matrix (with momentum).
+    coef : Float[Tensor, "B M P"]
+        Current coefficient matrix.
+    i : Float[Tensor, "B 1 1"]
+        Iteration counter for each batch element.
+    step_sizes :  Float[Tensor, "B 1 1"]
+        Step sizes for optimization.
+    sequence_length : int
+        Sequence length for normalization.
+    lambdas : Float[Tensor, "1 M 1"]
+        Regularization parameters.
+
+    Returns
+    -------
+    coef_inter : Float[Tensor, "B M P"]
+        Updated intermediate coefficients with momentum.
+    coef : Float[Tensor, "B M P"]
+        Updated coefficient matrix.
+    gap : Float[Tensor, " B"]
+        Normalized duality gap.
+    nll : Float[Tensor, " B"]
+        Normalized negative log likelihood.
+
+    Notes
+    -----
+    Uses Nesterov momentum with momentum coefficient i/(i+3) for improved
+    convergence properties. The duality gap and NLL are normalized by
+    sequence length for scale-invariant convergence assessment.
+
+    References
+    ----------
+    https://yuxinchen2020.github.io/ele520_math_data/lectures/lasso_algorithm_extension.pdf, slides 22, 27
     """
     coef_prev = coef
 
     # Proximal gradient descent step
-    coef, gap, nll = prox_grad_step(coef_inter, importance_scale, cwms, contribs, sequences, 
-                                    lambdas, step_sizes)
-    gap = gap / l
-    nll = nll / (2 * l)
-
-    # Compute updated coefficients
-    mom_term = i / (i + 3.)
+    coef, gap, nll = prox_grad_step(
+        coef_inter, importance_scale, cwms, contribs, sequences, lambdas, step_sizes
+    )
+    gap = gap / sequence_length
+    nll = nll / (2 * sequence_length)
+
+    # Compute updated coefficients with Nesterov momentum
+    mom_term = i / (i + 3.0)
     coef_inter = (1 + mom_term) * coef - mom_term * coef_prev
 
     return coef_inter, coef, gap, nll
 
 
-def _to_channel_last_layout(tensor, **kwargs):
-    return tensor[:,:,:,None].to(memory_format=torch.channels_last, **kwargs).squeeze(3)
+def _to_channel_last_layout(tensor: Tensor, **kwargs) -> torch.Tensor:
+    """Convert tensor to channel-last memory layout for optimized convolution operations.
+
+    Parameters
+    ----------
+    tensor : torch.Tensor
+        Input tensor to convert.
+    **kwargs
+        Additional keyword arguments passed to tensor.to().
+
+    Returns
+    -------
+    torch.Tensor
+        Tensor with channel-last memory layout.
+    """
+    return (
+        tensor[:, :, :, None].to(memory_format=torch.channels_last, **kwargs).squeeze(3)
+    )
+
+
+def _signed_sqrt(x: torch.Tensor) -> torch.Tensor:
+    """Apply signed square root transformation to input tensor.
 
+    This transformation preserves the sign while applying square root to the
+    absolute value, which can help with numerical stability and gradient flow.
 
-def _signed_sqrt(x):
+    Parameters
+    ----------
+    x : torch.Tensor
+        Input tensor.
+
+    Returns
+    -------
+    torch.Tensor
+        Transformed tensor with same shape as input.
+    """
     return torch.sign(x) * torch.sqrt(torch.abs(x))
 
 
-class BatchLoaderBase:
-    def __init__(self, contribs, sequences, l, device):
+class BatchLoaderBase(ABC):
+    """Base class for loading batches of contribution scores and sequences.
+
+    This class provides common functionality for different input formats
+    including batch indexing and padding for consistent batch sizes.
+
+    N = number of sequences, L = sequence length.
+
+    Parameters
+    ----------
+    contribs : Union[Float[Tensor, "N 4 L"], Float[Tensor, "N L"]]
+        Contribution scores array.
+    sequences : Int[Tensor, "N 4 L"]
+        One-hot encoded sequences array.
+    sequence_length : int
+        Sequence length.
+    device : torch.device
+        Target device for tensor operations.
+    """
+
+    def __init__(
+        self,
+        contribs: Union[Float[Tensor, "N 4 L"], Float[Tensor, "N L"]],
+        sequences: Int[Tensor, "N 4 L"],
+        sequence_length: int,
+        device: torch.device,
+    ) -> None:
         self.contribs = contribs
         self.sequences = sequences
-        self.l = l
+        self.sequence_length = sequence_length
         self.device = device
 
-    def _get_inds_and_pad_lens(self, start, end):
+    def _get_inds_and_pad_lens(
+        self, start: int, end: int
+    ) -> Tuple[Int[Tensor, " Z"], Tuple[int, ...]]:
+        """Get indices and padding lengths for batch loading.
+
+        Parameters
+        ----------
+        start : int
+            Start index for batch.
+        end : int
+            End index for batch.
+
+        Returns
+        -------
+        inds : Int[Tensor, " Z"]
+            Padded indices tensor with -1 for padding positions.
+        pad_lens : tuple
+            Padding specification for F.pad (left, right, top, bottom, front, back).
+        """
         n = end - start
         end = min(end, self.contribs.shape[0])
         overhang = n - (end - start)
         pad_lens = (0, 0, 0, 0, 0, overhang)
 
-        inds = F.pad(torch.arange(start, end, dtype=torch.int), (0, overhang), value=-1).to(device=self.device)
+        inds = F.pad(
+            torch.arange(start, end, dtype=torch.int), (0, overhang), value=-1
+        ).to(device=self.device)
 
         return inds, pad_lens
 
-    def load_batch(self, start, end):
-        raise NotImplementedError
-    
+    @abstractmethod
+    def load_batch(
+        self, start: int, end: int
+    ) -> Tuple[
+        Float[Tensor, "B 4 L"], Union[Int[Tensor, "B 4 L"], int], Int[Tensor, " B"]
+    ]:
+        """Load a batch of data.
+
+        B = batch size, L = sequence length.
+
+        Parameters
+        ----------
+        start : int
+            Start index (used by subclasses).
+        end : int
+            End index (used by subclasses).
+
+        Returns
+        -------
+        contribs_batch : Float[Tensor, "B 4 L"]
+            Batch of contribution scores.
+        sequences_batch : Union[Int[Tensor, "B 4 L"], int]
+            Batch of sequences or scalar for hypothetical mode.
+        inds_batch : Int[Tensor, "B"]
+            Batch indices.
+
+        Notes
+        -----
+        This is an abstract method that must be implemented by subclasses.
+        Parameters are intentionally unused in the base implementation.
+        """
+        pass
+
 
 class BatchLoaderCompactFmt(BatchLoaderBase):
-    def load_batch(self, start, end):
-        inds, pad_lens = self._get_inds_and_pad_lens(start, end)
+    """Batch loader for compact format contribution scores.
 
-        contribs_compact = F.pad(self.contribs[start:end,None,:], pad_lens)
-        contribs_batch = _to_channel_last_layout(contribs_compact, device=self.device, dtype=torch.float32)
-        sequences_batch = F.pad(self.sequences[start:end,:,:], pad_lens) # (b, 4, l)
-        sequences_batch = _to_channel_last_layout(sequences_batch, device=self.device, dtype=torch.int8)
+    Handles contribution scores in shape (N, L) representing projected
+    scores that need to be broadcasted to (N, 4, L) format.
+    """
+
+    def load_batch(
+        self, start: int, end: int
+    ) -> Tuple[Float[Tensor, "B 4 L"], Int[Tensor, "B 4 L"], Int[Tensor, " B"]]:
+        inds, pad_lens = self._get_inds_and_pad_lens(start, end)
 
-        # global_scale = ((contribs_batch**2).sum(dim=(1,2)) / self.l).sqrt()
+        contribs_compact = F.pad(self.contribs[start:end, None, :], pad_lens)
+        contribs_batch = _to_channel_last_layout(
+            contribs_compact, device=self.device, dtype=torch.float32
+        )
+        sequences_batch = F.pad(self.sequences[start:end, :, :], pad_lens)  # (b, 4, l)
+        sequences_batch = _to_channel_last_layout(
+            sequences_batch, device=self.device, dtype=torch.int8
+        )
 
-        contribs_batch = contribs_batch * sequences_batch # (b, 4, l)
+        contribs_batch = contribs_batch * sequences_batch  # (b, 4, l)
 
         return contribs_batch, sequences_batch, inds
 
 
 class BatchLoaderProj(BatchLoaderBase):
-    def load_batch(self, start, end):
+    """Batch loader for projected contribution scores.
+
+    Handles contribution scores in shape (N, 4, L) where scores are
+    element-wise multiplied by one-hot sequences to get projected contributions.
+    """
+
+    def load_batch(
+        self, start: int, end: int
+    ) -> Tuple[Float[Tensor, "B 4 L"], Int[Tensor, "B 4 L"], Int[Tensor, " B"]]:
         inds, pad_lens = self._get_inds_and_pad_lens(start, end)
 
-        contribs_hyp = F.pad(self.contribs[start:end,:,:], pad_lens)
-        contribs_hyp = _to_channel_last_layout(contribs_hyp, device=self.device, dtype=torch.float32) 
-        sequences_batch = F.pad(self.sequences[start:end,:,:], pad_lens) # (b, 4, l)
-        sequences_batch = _to_channel_last_layout(sequences_batch, device=self.device, dtype=torch.int8)
+        contribs_hyp = F.pad(self.contribs[start:end, :, :], pad_lens)
+        contribs_hyp = _to_channel_last_layout(
+            contribs_hyp, device=self.device, dtype=torch.float32
+        )
+        sequences_batch = F.pad(self.sequences[start:end, :, :], pad_lens)  # (b, 4, l)
+        sequences_batch = _to_channel_last_layout(
+            sequences_batch, device=self.device, dtype=torch.int8
+        )
         contribs_batch = contribs_hyp * sequences_batch
 
-        # global_scale = ((contribs_batch**2).sum(dim=(1,2)) / self.l).sqrt()
-        # contribs_batch = torch.nan_to_num(contribs_batch / global_scale[:,None,None])
-
         return contribs_batch, sequences_batch, inds
-    
+
 
 class BatchLoaderHyp(BatchLoaderBase):
-    def load_batch(self, start, end):
-        inds, pad_lens = self._get_inds_and_pad_lens(start, end)
+    """Batch loader for hypothetical contribution scores.
 
-        contribs_batch = F.pad(self.contribs[start:end,:,:], pad_lens)
-        contribs_batch = _to_channel_last_layout(contribs_batch, device=self.device, dtype=torch.float32)
+    Handles hypothetical contribution scores in shape (N, 4, L) where
+    scores represent counterfactual effects of base substitutions.
+    """
+
+    def load_batch(
+        self, start: int, end: int
+    ) -> Tuple[Float[Tensor, "B 4 L"], int, Int[Tensor, " B"]]:
+        inds, pad_lens = self._get_inds_and_pad_lens(start, end)
 
-        # global_scale = ((contribs_batch**2).sum(dim=(1,2)) / self.l).sqrt()
-        # contribs_batch = torch.nan_to_num(contribs_batch / global_scale[:,None,None])
+        contribs_batch = F.pad(self.contribs[start:end, :, :], pad_lens)
+        contribs_batch = _to_channel_last_layout(
+            contribs_batch, device=self.device, dtype=torch.float32
+        )
 
         return contribs_batch, 1, inds
 
 
-def fit_contribs(cwms, contribs, sequences, cwm_trim_mask, use_hypothetical, lambdas, step_size_max, step_size_min, sqrt_transform,
-                 convergence_tol, max_steps, batch_size, step_adjust, post_filter, device, compile_optimizer, eps=1.):
-    """
-    Call hits by fitting sparse linear model to contributions
-    
-    cwms: (m, 4, w)
-    contribs: (n, 4, l) or (n, l)  
-    sequences: (n, 4, l)
-    cwm_trim_mask: (m, w)
+def fit_contribs(
+    cwms: Float[ndarray, "M 4 W"],
+    contribs: Union[Float[ndarray, "N 4 L"], Float[ndarray, "N L"]],
+    sequences: Int[ndarray, "N 4 L"],
+    cwm_trim_mask: Float[ndarray, "M W"],
+    use_hypothetical: bool,
+    lambdas: Float[ndarray, " M"],
+    step_size_max: float,
+    step_size_min: float,
+    sqrt_transform: bool,
+    convergence_tol: float,
+    max_steps: int,
+    batch_size: int,
+    step_adjust: float,
+    post_filter: bool,
+    device: Optional[torch.device],
+    compile_optimizer: bool,
+    eps: float = 1.0,
+) -> Tuple[pl.DataFrame, pl.DataFrame]:
+    """Call motif hits by fitting sparse linear model to contribution scores.
+
+    This is the main function implementing the Fi-NeMo algorithm. It identifies
+    motif instances by solving a sparse reconstruction problem where contribution
+    scores are approximated as a linear combination of motif CWMs at specific
+    positions. The optimization uses proximal gradient descent with momentum.
+
+    Parameters
+    ----------
+    cwms : Float[ndarray, "M 4 W"]
+        Motif contribution weight matrices where M = number of motifs,
+        4 = DNA bases (A, C, G, T), W = motif width.
+    contribs : Float[ndarray, "N 4 L"] | Float[ndarray, "N L"]
+        Neural network contribution scores where N = number of regions,
+        L = sequence length. Can be hypothetical (N, 4, L) or projected (N, L).
+    sequences : Float[ndarray, "N 4 L"]
+        One-hot encoded DNA sequences.
+    cwm_trim_mask : Float[ndarray, "M W"]
+        Binary mask indicating which positions of each CWM to use.
+    use_hypothetical : bool
+        Whether to use hypothetical contribution scores (True) or
+        projected scores (False).
+    lambdas : Float[ndarray, "M"]
+        L1 regularization weights for each motif.
+    step_size_max : float
+        Maximum optimization step size.
+    step_size_min : float
+        Minimum optimization step size (for convergence failure detection).
+    sqrt_transform : bool
+        Whether to apply signed square root transformation to inputs.
+    convergence_tol : float
+        Convergence tolerance based on duality gap.
+    max_steps : int
+        Maximum number of optimization steps.
+    batch_size : int
+        Number of regions to process simultaneously.
+    step_adjust : float
+        Factor to reduce step size when optimization diverges.
+    post_filter : bool
+        Whether to filter hits based on similarity threshold.
+    device : torch.device, optional
+        Target device for computation. Auto-detected if None.
+    compile_optimizer : bool
+        Whether to JIT compile the optimizer for speed.
+    eps : float, default 1.0
+        Small constant for numerical stability.
+
+    Returns
+    -------
+    hits_df : pl.DataFrame
+        DataFrame containing called motif hits with columns:
+        - peak_id: Region index
+        - motif_id: Motif index
+        - hit_start: Start position of hit
+        - hit_coefficient: Hit strength coefficient
+        - hit_similarity: Cosine similarity with motif
+        - hit_importance: Total contribution score in hit region
+        - hit_importance_sq: Sum of squared contributions (for normalization)
+    qc_df : pl.DataFrame
+        DataFrame containing quality control metrics with columns:
+        - peak_id: Region index
+        - nll: Final negative log likelihood
+        - dual_gap: Final duality gap
+        - num_steps: Number of optimization steps
+        - step_size: Final step size
+        - global_scale: Region-level scaling factor
+
+    Notes
+    -----
+    The algorithm solves the optimization problem:
+
+    minimize_c: ||contribs - Σⱼ convolve(c * scale, cwms[j]) * sequences||²₂ + Σⱼ λⱼ||c[:,j]||₁
+
+    subject to: c ≥ 0
+
+    where c[i,j] represents the strength of motif j at position i.
+
+    The importance scaling balances reconstruction across different
+    motifs and positions based on the local contribution magnitude.
+
+    Examples
+    --------
+    >>> hits_df, qc_df = fit_contribs(
+    ...     cwms=motif_cwms,
+    ...     contribs=contrib_scores,
+    ...     sequences=onehot_seqs,
+    ...     cwm_trim_mask=trim_masks,
+    ...     use_hypothetical=False,
+    ...     lambdas=np.array([0.7, 0.8]),
+    ...     step_size_max=3.0,
+    ...     step_size_min=0.08,
+    ...     sqrt_transform=False,
+    ...     convergence_tol=0.0005,
+    ...     max_steps=10000,
+    ...     batch_size=1000,
+    ...     step_adjust=0.7,
+    ...     post_filter=True,
+    ...     device=None,
+    ...     compile_optimizer=False
+    ... )
     """
     m, _, w = cwms.shape
-    n, _, l = sequences.shape
+    n, _, sequence_length = sequences.shape
 
     b = batch_size
 
@@ -175,74 +542,113 @@ def fit_contribs(cwms, contribs, sequences, cwm_trim_mask, use_hypothetical, lam
         else:
             device = torch.device("cpu")
             warnings.warn("No GPU available. Running on CPU.", RuntimeWarning)
-    
+
     # Compile optimizer if requested
     global optimizer_step
     if compile_optimizer:
         optimizer_step = torch.compile(optimizer_step, fullgraph=True)
 
-    # Convert inputs to pytorch tensors
-    cwms = torch.from_numpy(cwms)
-    contribs = torch.from_numpy(contribs)
-    sequences = torch.from_numpy(sequences)
-    cwm_trim_mask = torch.from_numpy(cwm_trim_mask)[:,None,:].repeat(1,4,1)
-    lambdas = torch.from_numpy(lambdas)[None,:,None].to(device=device, dtype=torch.float32)
-
-    cwms = _to_channel_last_layout(cwms, device=device, dtype=torch.float32)
-    cwm_trim_mask = _to_channel_last_layout(cwm_trim_mask, device=device, dtype=torch.float32)
-    cwms = cwms * cwm_trim_mask
+    # Convert inputs to PyTorch tensors with proper device placement
+    cwms_tensor: torch.Tensor = torch.from_numpy(cwms)
+    contribs_tensor: torch.Tensor = torch.from_numpy(contribs)
+    sequences_tensor: torch.Tensor = torch.from_numpy(sequences)
+    cwm_trim_mask_tensor = torch.from_numpy(cwm_trim_mask)[:, None, :].repeat(1, 4, 1)
+    lambdas_tensor: torch.Tensor = torch.from_numpy(lambdas)[None, :, None].to(
+        device=device, dtype=torch.float32
+    )
+
+    # Convert to channel-last layout for optimized convolution operations
+    cwms_tensor = _to_channel_last_layout(
+        cwms_tensor, device=device, dtype=torch.float32
+    )
+    cwm_trim_mask_tensor = _to_channel_last_layout(
+        cwm_trim_mask_tensor, device=device, dtype=torch.float32
+    )
+    cwms_tensor = cwms_tensor * cwm_trim_mask_tensor  # Apply trimming mask
 
     if sqrt_transform:
-        cwms = _signed_sqrt(cwms)
-        cwm_norm = (cwms**2).sum(dim=(1,2)).sqrt()
-        cwms = cwms / cwm_norm[:,None,None]
+        cwms_tensor = _signed_sqrt(cwms_tensor)
+        cwm_norm = (cwms_tensor**2).sum(dim=(1, 2)).sqrt()
+        cwms_tensor = cwms_tensor / cwm_norm[:, None, None]
 
     # Initialize batch loader
-    if len(contribs.shape) == 3:
+    if len(contribs_tensor.shape) == 3:
         if use_hypothetical:
-            batch_loader = BatchLoaderHyp(contribs, sequences, l, device)
+            batch_loader = BatchLoaderHyp(
+                contribs_tensor, sequences_tensor, sequence_length, device
+            )
         else:
-            batch_loader = BatchLoaderProj(contribs, sequences, l, device)
-    elif len(contribs.shape) == 2:
+            batch_loader = BatchLoaderProj(
+                contribs_tensor, sequences_tensor, sequence_length, device
+            )
+    elif len(contribs_tensor.shape) == 2:
         if use_hypothetical:
-            raise ValueError("Input regions do not contain hypothetical contribution scores")
+            raise ValueError(
+                "Input regions do not contain hypothetical contribution scores"
+            )
         else:
-            batch_loader = BatchLoaderCompactFmt(contribs, sequences, l, device)
+            batch_loader = BatchLoaderCompactFmt(
+                contribs_tensor, sequences_tensor, sequence_length, device
+            )
     else:
-        raise ValueError(f"Input contributions array is of incorrect shape {contribs.shape}")
-
-    # Intialize output container objects
-    hit_idxs_lst = []
-    coefficients_lst = []
-    similarity_lst = []
-    importance_lst = []
-    importance_sq_lst = []
-    qc_lsts = {"nll": [], "dual_gap": [], "num_steps": [], "step_size": [], "global_scale": [], "peak_id": []}
+        raise ValueError(
+            f"Input contributions array is of incorrect shape {contribs_tensor.shape}"
+        )
+
+    # Initialize output container objects
+    hit_idxs_lst: List[ndarray] = []
+    coefficients_lst: List[ndarray] = []
+    similarity_lst: List[ndarray] = []
+    importance_lst: List[ndarray] = []
+    importance_sq_lst: List[ndarray] = []
+    qc_lsts: Dict[str, List[ndarray]] = {
+        "nll": [],
+        "dual_gap": [],
+        "num_steps": [],
+        "step_size": [],
+        "global_scale": [],
+        "peak_id": [],
+    }
 
     # Initialize buffers for optimizer
-    coef_inter = torch.zeros((b, m, l - w + 1)) # (b, m, l - w + 1)
+    coef_inter: Float[Tensor, "B M P"] = torch.zeros(
+        (b, m, sequence_length - w + 1)
+    )  # (b, m, sequence_length - w + 1)
     coef_inter = _to_channel_last_layout(coef_inter, device=device, dtype=torch.float32)
-    coef = torch.zeros_like(coef_inter)
-    i = torch.zeros((b, 1, 1), dtype=torch.int, device=device)
-    step_sizes = torch.full((b, 1, 1), step_size_max, dtype=torch.float32, device=device)
-    
-    converged = torch.full((b,), True, dtype=torch.bool, device=device)
+    coef: Float[Tensor, "B M P"] = torch.zeros_like(coef_inter)
+    i: Float[Tensor, "B 1 1"] = torch.zeros((b, 1, 1), dtype=torch.int, device=device)
+    step_sizes: Float[Tensor, "B 1 1"] = torch.full(
+        (b, 1, 1), step_size_max, dtype=torch.float32, device=device
+    )
+
+    converged: Bool[Tensor, " B"] = torch.full(
+        (b,), True, dtype=torch.bool, device=device
+    )
     num_load = b
 
-    contribs_buf = torch.zeros((b, 4, l))
-    contribs_buf = _to_channel_last_layout(contribs_buf, device=device, dtype=torch.float32)
+    contribs_buf: Float[Tensor, "B 4 L"] = torch.zeros((b, 4, sequence_length))
+    contribs_buf = _to_channel_last_layout(
+        contribs_buf, device=device, dtype=torch.float32
+    )
 
+    seqs_buf: Union[Int[Tensor, "B 4 L"], int]
     if use_hypothetical:
         seqs_buf = 1
     else:
-        seqs_buf = torch.zeros((b, 4, l))
+        seqs_buf = torch.zeros((b, 4, sequence_length))
         seqs_buf = _to_channel_last_layout(seqs_buf, device=device, dtype=torch.int8)
 
-    importance_scale_buf = torch.zeros((b, m, l - w + 1))
-    importance_scale_buf = _to_channel_last_layout(importance_scale_buf, device=device, dtype=torch.float32)
+    importance_scale_buf: Float[Tensor, "B M P"] = torch.zeros(
+        (b, m, sequence_length - w + 1)
+    )
+    importance_scale_buf = _to_channel_last_layout(
+        importance_scale_buf, device=device, dtype=torch.float32
+    )
 
-    inds_buf = torch.zeros((b,), dtype=torch.int, device=device)
-    global_scale_buf = torch.zeros((b,), dtype=torch.float, device=device)
+    inds_buf: Int[Tensor, " B"] = torch.zeros((b,), dtype=torch.int, device=device)
+    global_scale_buf: Float[Tensor, " B"] = torch.zeros(
+        (b,), dtype=torch.float, device=device
+    )
 
     with tqdm(disable=None, unit="regions", total=n, ncols=120) as pbar:
         num_complete = 0
@@ -252,61 +658,80 @@ def fit_contribs(cwms, contribs, sequences, cwm_trim_mask, use_hypothetical, lam
             if num_load > 0:
                 load_start = next_ind
                 load_end = load_start + num_load
-                next_ind = min(load_end, contribs.shape[0])
+                next_ind = min(load_end, contribs_tensor.shape[0])
 
-                batch_data = batch_loader.load_batch(load_start, load_end)
+                batch_data = batch_loader.load_batch(int(load_start), int(load_end))
                 contribs_batch, seqs_batch, inds_batch = batch_data
 
                 if sqrt_transform:
                     contribs_batch = _signed_sqrt(contribs_batch)
-                
-                global_scale_batch = ((contribs_batch**2).sum(dim=(1,2)) / l).sqrt()
-                contribs_batch = torch.nan_to_num(contribs_batch / global_scale_batch[:,None,None])
 
-                importance_scale_batch = (F.conv1d(contribs_batch**2, cwm_trim_mask) + eps)**(-0.5)
+                global_scale_batch = (
+                    (contribs_batch**2).sum(dim=(1, 2)) / sequence_length
+                ).sqrt()
+                contribs_batch = torch.nan_to_num(
+                    contribs_batch / global_scale_batch[:, None, None]
+                )
+
+                importance_scale_batch = (
+                    F.conv1d(contribs_batch**2, cwm_trim_mask_tensor) + eps
+                ) ** (-0.5)
                 importance_scale_batch = importance_scale_batch.clamp(max=10)
 
-                contribs_buf[converged,:,:] = contribs_batch
+                contribs_buf[converged, :, :] = contribs_batch
                 if not use_hypothetical:
-                    seqs_buf[converged,:,:] = seqs_batch
+                    seqs_buf[converged, :, :] = seqs_batch  # type: ignore
+
+                importance_scale_buf[converged, :, :] = importance_scale_batch
 
-                importance_scale_buf[converged,:,:] = importance_scale_batch
-                
                 inds_buf[converged] = inds_batch
                 global_scale_buf[converged] = global_scale_batch
 
-                coef_inter[converged,:,:] *= 0
-                coef[converged,:,:] *= 0
+                coef_inter[converged, :, :] *= 0
+                coef[converged, :, :] *= 0
                 i[converged] *= 0
 
                 step_sizes[converged] = step_size_max
 
             # Optimization step
-            coef_inter, coef, gap, nll = optimizer_step(cwms, contribs_buf, importance_scale_buf, seqs_buf, coef_inter, coef, 
-                                               i, step_sizes, l, lambdas)
+            coef_inter, coef, gap, nll = optimizer_step(
+                cwms_tensor,
+                contribs_buf,
+                importance_scale_buf,
+                seqs_buf,
+                coef_inter,
+                coef,
+                i,
+                step_sizes,
+                sequence_length,
+                lambdas_tensor,
+            )
             i += 1
 
             # Assess convergence of each peak being optimized. Reset diverged peaks with lower step size.
             active = inds_buf >= 0
 
             diverged = ~torch.isfinite(gap) & active
-            coef_inter[diverged,:,:] *= 0
-            coef[diverged,:,:] *= 0
+            coef_inter[diverged, :, :] *= 0
+            coef[diverged, :, :] *= 0
             i[diverged] *= 0
-            step_sizes[diverged,:,:] *= step_adjust
+            step_sizes[diverged, :, :] *= step_adjust
 
             timeouts = (i > max_steps).squeeze() & active
             if timeouts.sum().item() > 0:
                 timeout_inds = inds_buf[timeouts]
                 for ind in timeout_inds:
-                    warnings.warn(f"Region {ind} has not converged within max_steps={max_steps} iterations.", RuntimeWarning)
+                    warnings.warn(
+                        f"Region {ind} has not converged within max_steps={max_steps} iterations.",
+                        RuntimeWarning,
+                    )
 
             fails = (step_sizes < step_size_min).squeeze() & active
             if fails.sum().item() > 0:
                 fail_inds = inds_buf[fails]
                 for ind in fail_inds:
                     warnings.warn(f"Optimizer failed for region {ind}.", RuntimeWarning)
-            
+
             converged = ((gap <= convergence_tol) | timeouts | fails) & active
             num_load = converged.sum().item()
 
@@ -315,27 +740,32 @@ def fit_contribs(cwms, contribs, sequences, cwm_trim_mask, use_hypothetical, lam
                 inds_out = inds_buf[converged]
                 global_scale_out = global_scale_buf[converged]
 
-                # Compute hit scores 
-                coef_out = coef[converged,:,:]
-                importance_scale_out_dense = importance_scale_buf[converged,:,:]
-                importance_sq = importance_scale_out_dense**(-2) - eps
+                # Compute hit scores
+                coef_out = coef[converged, :, :]
+                importance_scale_out_dense = importance_scale_buf[converged, :, :]
+                importance_sq = importance_scale_out_dense ** (-2) - eps
                 xcor_scale = importance_sq.sqrt()
 
-                contribs_converged = contribs_buf[converged,:,:]
-                importance_sum_out_dense = F.conv1d(torch.abs(contribs_converged), cwm_trim_mask)
-                xcov_out_dense = F.conv1d(contribs_converged, cwms) 
-                # xcov_out_dense = F.conv1d(torch.abs(contribs_converged), cwms) 
+                contribs_converged = contribs_buf[converged, :, :]
+                importance_sum_out_dense = F.conv1d(
+                    torch.abs(contribs_converged), cwm_trim_mask_tensor
+                )
+                xcov_out_dense = F.conv1d(contribs_converged, cwms_tensor)
+                # xcov_out_dense = F.conv1d(torch.abs(contribs_converged), cwms_tensor)
                 xcor_out_dense = xcov_out_dense / xcor_scale
 
                 if post_filter:
-                    coef_out = coef_out * (xcor_out_dense >= lambdas)
+                    coef_out = coef_out * (xcor_out_dense >= lambdas_tensor)
 
-                # Extract hit coordinates
+                # Extract hit coordinates using sparse tensor representation
                 coef_out = coef_out.to_sparse()
 
-                hit_idxs_out = torch.clone(coef_out.indices())
-                hit_idxs_out[0,:] = F.embedding(hit_idxs_out[0,:], inds_out[:,None]).squeeze()
-                    # Map buffer index to peak index
+                # Tensor indexing operations for hit extraction
+                hit_idxs_out = torch.clone(coef_out.indices())  # Sparse tensor indices
+                hit_idxs_out[0, :] = F.embedding(
+                    hit_idxs_out[0, :], inds_out[:, None]
+                ).squeeze()  # Embedding lookup with complex indexing
+                # Map buffer index to peak index
 
                 ind_tuple = torch.unbind(coef_out.indices())
                 importance_out = importance_sum_out_dense[ind_tuple]
@@ -347,8 +777,8 @@ def fit_contribs(cwms, contribs, sequences, cwm_trim_mask, use_hypothetical, lam
                 # Store outputs
                 gap_out = gap[converged]
                 nll_out = nll[converged]
-                step_out = i[converged,0,0]
-                step_sizes_out = step_sizes[converged,0,0]
+                step_out = i[converged, 0, 0]
+                step_sizes_out = step_sizes[converged, 0, 0]
 
                 hit_idxs_lst.append(hit_idxs_out.numpy(force=True).T)
                 coefficients_lst.append(scores_out_raw.numpy(force=True))
@@ -373,19 +803,19 @@ def fit_contribs(cwms, contribs, sequences, cwm_trim_mask, use_hypothetical, lam
     scores_importance = np.concatenate(importance_lst, axis=0)
     scores_importance_sq = np.concatenate(importance_sq_lst, axis=0)
 
-    hits = {
-        "peak_id": hit_idxs[:,0].astype(np.uint32),
-        "motif_id": hit_idxs[:,1].astype(np.uint32),
-        "hit_start": hit_idxs[:,2],
+    hits: Dict[str, ndarray] = {
+        "peak_id": hit_idxs[:, 0].astype(np.uint32),
+        "motif_id": hit_idxs[:, 1].astype(np.uint32),
+        "hit_start": hit_idxs[:, 2],
         "hit_coefficient": scores_coefficient,
         "hit_similarity": scores_similarity,
         "hit_importance": scores_importance,
         "hit_importance_sq": scores_importance_sq,
     }
 
-    qc = {k: np.concatenate(v, axis=0) for k, v in qc_lsts.items()}
+    qc: Dict[str, ndarray] = {k: np.concatenate(v, axis=0) for k, v in qc_lsts.items()}
 
     hits_df = pl.DataFrame(hits)
     qc_df = pl.DataFrame(qc)
 
-    return hits_df, qc_df
\ No newline at end of file
+    return hits_df, qc_df
diff --git a/src/finemo/main.py b/src/finemo/main.py
index 852147c..16d03af 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -1,20 +1,88 @@
+"""Main CLI module for the Fi-NeMo motif instance calling pipeline.
+
+This module provides the command-line interface for all Fi-NeMo operations:
+- Data preprocessing from various genomic formats
+- Motif hit calling using the Fi-NeMo algorithm
+- Report generation and result visualization
+- Post-processing operations (hit collapsing, intersection)
+
+The CLI supports multiple input formats including bigWig, HDF5 (ChromBPNet/BPNet),
+and TF-MoDISCo format.
+"""
+
 from . import data_io
 
 import os
 import argparse
 import warnings
-
-
-def extract_regions_bw(peaks_path, chrom_order_path, fa_path, bw_paths, out_path, region_width):
+from typing import Optional, List
+
+import polars as pl
+
+
+def extract_regions_bw(
+    peaks_path: str,
+    chrom_order_path: Optional[str],
+    fa_path: str,
+    bw_paths: List[str],
+    out_path: str,
+    region_width: int,
+) -> None:
+    """Extract genomic regions and contribution scores from bigWig and FASTA files.
+
+    Parameters
+    ----------
+    peaks_path : str
+        Path to ENCODE NarrowPeak format file.
+    chrom_order_path : str, optional
+        Path to chromosome ordering file.
+    fa_path : str
+        Path to genome FASTA file.
+    bw_paths : List[str]
+        List of bigWig file paths containing contribution scores.
+    out_path : str
+        Output path for NPZ file.
+    region_width : int
+        Width of regions to extract around peak summits.
+
+    Notes
+    -----
+    BigWig files only provide projected contribution scores.
+    """
     half_width = region_width // 2
 
+    # Load peak regions and extract sequences/contributions
     peaks_df = data_io.load_peaks(peaks_path, chrom_order_path, half_width)
-    sequences, contribs = data_io.load_regions_from_bw(peaks_df, fa_path, bw_paths, half_width)
+    sequences, contribs = data_io.load_regions_from_bw(
+        peaks_df, fa_path, bw_paths, half_width
+    )
 
+    # Save processed data to NPZ format
     data_io.write_regions_npz(sequences, contribs, out_path, peaks_df=peaks_df)
 
 
-def extract_regions_chrombpnet_h5(peaks_path, chrom_order_path, h5_paths, out_path, region_width):
+def extract_regions_chrombpnet_h5(
+    peaks_path: Optional[str],
+    chrom_order_path: Optional[str],
+    h5_paths: List[str],
+    out_path: str,
+    region_width: int,
+) -> None:
+    """Extract genomic regions and contribution scores from ChromBPNet HDF5 files.
+
+    Parameters
+    ----------
+    peaks_path : str, optional
+        Path to ENCODE NarrowPeak format file. If None, lacks absolute coordinates.
+    chrom_order_path : str, optional
+        Path to chromosome ordering file.
+    h5_paths : List[str]
+        List of ChromBPNet HDF5 file paths.
+    out_path : str
+        Output path for NPZ file.
+    region_width : int
+        Width of regions to extract around peak summits.
+    """
     half_width = region_width // 2
 
     if peaks_path is not None:
@@ -27,7 +95,28 @@ def extract_regions_chrombpnet_h5(peaks_path, chrom_order_path, h5_paths, out_pa
     data_io.write_regions_npz(sequences, contribs, out_path, peaks_df=peaks_df)
 
 
-def extract_regions_bpnet_h5(peaks_path, chrom_order_path, h5_paths, out_path, region_width):
+def extract_regions_bpnet_h5(
+    peaks_path: Optional[str],
+    chrom_order_path: Optional[str],
+    h5_paths: List[str],
+    out_path: str,
+    region_width: int,
+) -> None:
+    """Extract genomic regions and contribution scores from BPNet HDF5 files.
+
+    Parameters
+    ----------
+    peaks_path : str, optional
+        Path to ENCODE NarrowPeak format file. If None, output lacks absolute coordinates.
+    chrom_order_path : str, optional
+        Path to chromosome ordering file.
+    h5_paths : List[str]
+        List of BPNet HDF5 file paths.
+    out_path : str
+        Output path for NPZ file.
+    region_width : int
+        Width of regions to extract around peak summits.
+    """
     half_width = region_width // 2
 
     if peaks_path is not None:
@@ -40,7 +129,31 @@ def extract_regions_bpnet_h5(peaks_path, chrom_order_path, h5_paths, out_path, r
     data_io.write_regions_npz(sequences, contribs, out_path, peaks_df=peaks_df)
 
 
-def extract_regions_modisco_fmt(peaks_path, chrom_order_path, shaps_paths, ohe_path, out_path, region_width):
+def extract_regions_modisco_fmt(
+    peaks_path: Optional[str],
+    chrom_order_path: Optional[str],
+    shaps_paths: List[str],
+    ohe_path: str,
+    out_path: str,
+    region_width: int,
+) -> None:
+    """Extract genomic regions and contribution scores from TF-MoDISCo format files.
+
+    Parameters
+    ----------
+    peaks_path : str, optional
+        Path to ENCODE NarrowPeak format file. If None, output lacks absolute coordinates.
+    chrom_order_path : str, optional
+        Path to chromosome ordering file.
+    shaps_paths : List[str]
+        List of paths to .npy/.npz files containing SHAP/attribution scores.
+    ohe_path : str
+        Path to .npy/.npz file containing one-hot encoded sequences.
+    out_path : str
+        Output path for NPZ file.
+    region_width : int
+        Width of regions to extract around peak summits.
+    """
     half_width = region_width // 2
 
     if peaks_path is not None:
@@ -48,38 +161,135 @@ def extract_regions_modisco_fmt(peaks_path, chrom_order_path, shaps_paths, ohe_p
     else:
         peaks_df = None
 
-    sequences, contribs = data_io.load_regions_from_modisco_fmt(shaps_paths, ohe_path, half_width)
+    sequences, contribs = data_io.load_regions_from_modisco_fmt(
+        shaps_paths, ohe_path, half_width
+    )
 
     data_io.write_regions_npz(sequences, contribs, out_path, peaks_df=peaks_df)
 
 
-def call_hits(regions_path, peaks_path, modisco_h5_path, chrom_order_path, motifs_include_path, motif_names_path, 
-              motif_lambdas_path, out_dir, cwm_trim_coords_path, cwm_trim_thresholds_path, cwm_trim_threshold_default, 
-              lambda_default, step_size_max, step_size_min, sqrt_transform, convergence_tol, max_steps, batch_size, 
-              step_adjust, device, mode, no_post_filter, compile_optimizer):
-    
+def call_hits(
+    regions_path: str,
+    peaks_path: Optional[str],
+    modisco_h5_path: str,
+    chrom_order_path: Optional[str],
+    motifs_include_path: Optional[str],
+    motif_names_path: Optional[str],
+    motif_lambdas_path: Optional[str],
+    out_dir: str,
+    cwm_trim_coords_path: Optional[str],
+    cwm_trim_thresholds_path: Optional[str],
+    cwm_trim_threshold_default: float,
+    lambda_default: float,
+    step_size_max: float,
+    step_size_min: float,
+    sqrt_transform: bool,
+    convergence_tol: float,
+    max_steps: int,
+    batch_size: int,
+    step_adjust: float,
+    device: Optional[str],
+    mode: str,
+    no_post_filter: bool,
+    compile_optimizer: bool,
+) -> None:
+    """Call motif hits using the Fi-NeMo algorithm on preprocessed genomic regions.
+
+    This function implements the core Fi-NeMo hit calling pipeline, which identifies
+    motif instances by solving a sparse reconstruction problem using proximal gradient
+    descent. The algorithm represents contribution scores as weighted combinations of
+    motif CWMs at specific positions.
+
+    Parameters
+    ----------
+    regions_path : str
+        Path to NPZ file containing preprocessed regions (sequences, contributions,
+        and optional peak coordinates).
+    peaks_path : str, optional
+        DEPRECATED. Path to ENCODE NarrowPeak format file. Peak data should be
+        included during preprocessing instead.
+    modisco_h5_path : str
+        Path to TF-MoDISco H5 file containing motif CWMs.
+    chrom_order_path : str, optional
+        DEPRECATED. Path to chromosome ordering file.
+    motifs_include_path : str, optional
+        Path to file listing motif names to include in analysis.
+    motif_names_path : str, optional
+        Path to file mapping motif IDs to custom names.
+    motif_lambdas_path : str, optional
+        Path to file specifying per-motif lambda values.
+    out_dir : str
+        Output directory for results.
+    cwm_trim_coords_path : str, optional
+        Path to file specifying custom motif trimming coordinates.
+    cwm_trim_thresholds_path : str, optional
+        Path to file specifying custom motif trimming thresholds.
+    cwm_trim_threshold_default : float
+        Default threshold for motif trimming (typically 0.3).
+    lambda_default : float
+        Default L1 regularization weight (typically 0.7).
+    step_size_max : float
+        Maximum optimization step size.
+    step_size_min : float
+        Minimum optimization step size.
+    sqrt_transform : bool
+        Whether to apply signed square root transform to contributions.
+    convergence_tol : float
+        Convergence tolerance for duality gap.
+    max_steps : int
+        Maximum number of optimization steps.
+    batch_size : int
+        Batch size for GPU processing.
+    step_adjust : float
+        Step size adjustment factor on divergence.
+    device : str, optional
+        DEPRECATED. Use CUDA_VISIBLE_DEVICES environment variable instead.
+    mode : str
+        Contribution type mode ('pp', 'ph', 'hp', 'hh') where 'p'=projected, 'h'=hypothetical.
+    no_post_filter : bool
+        If True, skip post-hit-calling similarity filtering.
+    compile_optimizer : bool
+        Whether to JIT-compile the optimizer for speed.
+
+    Notes
+    -----
+    The Fi-NeMo algorithm solves the optimization problem:
+    minimize_c: ||contribs - reconstruction(c)||²₂ + λ||c||₁
+    subject to: c ≥ 0
+
+    where c represents motif hit coefficients and reconstruction uses convolution
+    with motif CWMs.
+    """
+
     params = locals()
+    import torch
     from . import hitcaller
 
     if device is not None:
-        warnings.warn("The `--device` flag is deprecated and will be removed in a future version. Please use the `CUDA_VISIBLE_DEVICES` environment variable to specify the GPU device.")
-    
+        warnings.warn(
+            "The `--device` flag is deprecated and will be removed in a future version. Please use the `CUDA_VISIBLE_DEVICES` environment variable to specify the GPU device."
+        )
+
     sequences, contribs, peaks_df, has_peaks = data_io.load_regions_npz(regions_path)
 
     region_width = sequences.shape[2]
     if region_width % 2 != 0:
         raise ValueError(f"Region width of {region_width} is not divisible by 2.")
-    
+
     half_width = region_width // 2
     num_regions = contribs.shape[0]
 
     if peaks_path is not None:
-        warnings.warn("Providing a peaks file to `call-hits` is deprecated, and this option will be removed in a future version. Peaks should instead be provided in the preprocessing step to be included in `regions.npz`.")
+        warnings.warn(
+            "Providing a peaks file to `call-hits` is deprecated, and this option will be removed in a future version. Peaks should instead be provided in the preprocessing step to be included in `regions.npz`."
+        )
         peaks_df = data_io.load_peaks(peaks_path, chrom_order_path, half_width)
         has_peaks = True
 
     if not has_peaks:
-        warnings.warn("No peak region data provided. Output hits will lack absolute genomic coordinates.")
+        warnings.warn(
+            "No peak region data provided. Output hits will lack absolute genomic coordinates."
+        )
 
     if mode == "pp":
         motif_type = "cwm"
@@ -93,6 +303,10 @@ def call_hits(regions_path, peaks_path, modisco_h5_path, chrom_order_path, motif
     elif mode == "hh":
         motif_type = "hcwm"
         use_hypothetical_contribs = True
+    else:
+        raise ValueError(
+            f"Invalid mode: {mode}. Must be one of 'pp', 'ph', 'hp', 'hh'."
+        )
 
     if motifs_include_path is not None:
         motifs_include = data_io.load_txt(motifs_include_path)
@@ -118,15 +332,42 @@ def call_hits(regions_path, peaks_path, modisco_h5_path, chrom_order_path, motif
         trim_thresholds = data_io.load_mapping(cwm_trim_thresholds_path, float)
     else:
         trim_thresholds = None
-    
-    motifs_df, cwms, trim_masks, motif_names = data_io.load_modisco_motifs(modisco_h5_path, trim_coords, trim_thresholds, cwm_trim_threshold_default, 
-                                                                           motif_type, motifs_include, motif_name_map, motif_lambdas, lambda_default, True)
+
+    motifs_df, cwms, trim_masks, _ = data_io.load_modisco_motifs(
+        modisco_h5_path,
+        trim_coords,
+        trim_thresholds,
+        cwm_trim_threshold_default,
+        motif_type,
+        motifs_include,
+        motif_name_map,
+        motif_lambdas,
+        lambda_default,
+        True,
+    )
     num_motifs = cwms.shape[0]
     motif_width = cwms.shape[2]
     lambdas = motifs_df.get_column("lambda").to_numpy(writable=True)
 
-    hits_df, qc_df = hitcaller.fit_contribs(cwms, contribs, sequences, trim_masks, use_hypothetical_contribs, lambdas, step_size_max, step_size_min, 
-                                            sqrt_transform, convergence_tol, max_steps, batch_size, step_adjust, not no_post_filter, device, compile_optimizer)
+    device_obj = torch.device(device) if device is not None else None
+    hits_df, qc_df = hitcaller.fit_contribs(
+        cwms,
+        contribs,
+        sequences,
+        trim_masks,
+        use_hypothetical_contribs,
+        lambdas,
+        step_size_max,
+        step_size_min,
+        sqrt_transform,
+        convergence_tol,
+        max_steps,
+        batch_size,
+        step_adjust,
+        not no_post_filter,
+        device_obj,
+        compile_optimizer,
+    )
 
     os.makedirs(out_dir, exist_ok=True)
     out_path_qc = os.path.join(out_dir, "peaks_qc.tsv")
@@ -150,28 +391,86 @@ def call_hits(regions_path, peaks_path, modisco_h5_path, chrom_order_path, motif
     data_io.write_params(params, out_path_params)
 
 
-def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_path, motif_names_path, 
-           out_dir, modisco_region_width, cwm_trim_threshold, compute_recall, use_seqlets):
-    from . import evaluation, visualization     
+def report(
+    regions_path: str,
+    hits_dir: str,
+    modisco_h5_path: Optional[str],
+    peaks_path: Optional[str],
+    motifs_include_path: Optional[str],
+    motif_names_path: Optional[str],
+    out_dir: str,
+    modisco_region_width: int,
+    cwm_trim_threshold: float,
+    compute_recall: bool,
+    use_seqlets: bool,
+) -> None:
+    """Generate comprehensive HTML report with statistics and visualizations.
+
+    This function creates detailed analysis reports comparing Fi-NeMo hit calling
+    results with TF-MoDISCo seqlets, including performance metrics, distribution
+    plots, and motif visualization. The report provides insights into hit calling
+    quality and motif discovery accuracy.
+
+    Parameters
+    ----------
+    regions_path : str
+        Path to NPZ file containing the same regions used for hit calling.
+    hits_dir : str
+        Path to directory containing Fi-NeMo hit calling outputs.
+    modisco_h5_path : str, optional
+        Path to TF-MoDISCo H5 file. If None, seqlet comparisons are skipped.
+    peaks_path : str, optional
+        DEPRECATED. Peak coordinates should be included in regions file.
+    motifs_include_path : str, optional
+        DEPRECATED. This information is inferred from hit calling outputs.
+    motif_names_path : str, optional
+        DEPRECATED. This information is inferred from hit calling outputs.
+    out_dir : str
+        Output directory for report files.
+    modisco_region_width : int
+        Width of regions used by TF-MoDISCo (needed for coordinate conversion).
+    cwm_trim_threshold : float
+        DEPRECATED. This information is inferred from hit calling outputs.
+    compute_recall : bool
+        Whether to compute recall metrics against TF-MoDISCo seqlets.
+    use_seqlets : bool
+        Whether to include seqlet-based comparisons in the report.
+
+    Notes
+    -----
+    The generated report includes:
+    - Hit vs seqlet count comparisons
+    - Motif CWM visualizations
+    - Hit statistic distributions
+    - Co-occurrence heatmaps
+    - Confusion matrices for overlapping motifs
+    """
+    from . import evaluation, visualization
 
     sequences, contribs, peaks_df, _ = data_io.load_regions_npz(regions_path)
     if len(contribs.shape) == 3:
         regions = contribs * sequences
     elif len(contribs.shape) == 2:
-        regions = contribs[:,None,:] * sequences
+        regions = contribs[:, None, :] * sequences
+    else:
+        raise ValueError(f"Unexpected contribs shape: {contribs.shape}")
 
     half_width = regions.shape[2] // 2
     modisco_half_width = modisco_region_width // 2
 
     if peaks_path is not None:
-        warnings.warn("Providing a peaks file to `report` is deprecated, and this option will be removed in a future version. Peaks should instead be provided in the preprocessing step to be included in `regions.npz`.")
-        peaks_df = data_io.load_peaks(peaks_path, None, half_width)    
+        warnings.warn(
+            "Providing a peaks file to `report` is deprecated, and this option will be removed in a future version. Peaks should instead be provided in the preprocessing step to be included in `regions.npz`."
+        )
+        peaks_df = data_io.load_peaks(peaks_path, None, half_width)
 
     if hits_dir.endswith(".tsv"):
-        warnings.warn("Passing a hits.tsv file to `finemo report` is deprecated. Please provide the directory containing the hits.tsv file instead.")
+        warnings.warn(
+            "Passing a hits.tsv file to `finemo report` is deprecated. Please provide the directory containing the hits.tsv file instead."
+        )
 
         hits_path = hits_dir
-    
+
         hits_df = data_io.load_hits(hits_path, lazy=True)
 
         if motifs_include_path is not None:
@@ -180,12 +479,29 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
             motifs_include = None
 
         if motif_names_path is not None:
-            motif_name_map = data_io.load_txt(motif_names_path)
+            motif_name_map = data_io.load_mapping(motif_names_path, str)
         else:
             motif_name_map = None
 
-        motifs_df, cwms_modisco, trim_masks, motif_names = data_io.load_modisco_motifs(modisco_h5_path, None, None, cwm_trim_threshold, "cwm", 
-                                                                                       motifs_include, motif_name_map, None, None, True)
+        if modisco_h5_path is not None:
+            motifs_df, cwms_modisco, _, motif_names = data_io.load_modisco_motifs(
+                modisco_h5_path,
+                None,
+                None,
+                cwm_trim_threshold,
+                "cwm",
+                motifs_include,
+                motif_name_map,
+                None,
+                1.0,
+                True,
+            )
+        else:
+            # When no modisco_h5_path is provided in legacy TSV mode, we can't compute motifs
+            # This will cause an error later, but that's expected behavior
+            raise ValueError(
+                "modisco_h5_path is required when providing a hits.tsv file directly"
+            )
 
     else:
         hits_df_path = os.path.join(hits_dir, "hits.tsv")
@@ -202,64 +518,150 @@ def report(regions_path, hits_dir, modisco_h5_path, peaks_path, motifs_include_p
         cwm_trim_threshold = params["cwm_trim_threshold_default"]
 
     if not use_seqlets:
-        warnings.warn("Usage of the `--no-seqlets` flag is deprecated and will be removed in a future version. Please omit the `--modisco-h5` argument instead.")
+        warnings.warn(
+            "Usage of the `--no-seqlets` flag is deprecated and will be removed in a future version. Please omit the `--modisco-h5` argument instead."
+        )
         seqlets_df = None
     elif modisco_h5_path is None:
         compute_recall = False
         seqlets_df = None
     else:
-        seqlets_df = data_io.load_modisco_seqlets(modisco_h5_path, peaks_df, motifs_df, half_width, modisco_half_width, lazy=True)
+        seqlets_df = data_io.load_modisco_seqlets(
+            modisco_h5_path,
+            peaks_df,
+            motifs_df,
+            half_width,
+            modisco_half_width,
+            lazy=True,
+        )
 
     motif_width = cwms_modisco.shape[2]
 
-    occ_df, coooc = evaluation.get_motif_occurences(hits_df, motif_names)
+    # Convert to LazyFrame if needed and ensure motif_names is a list
+    if isinstance(hits_df, pl.LazyFrame):
+        hits_df_lazy: pl.LazyFrame = hits_df
+    else:
+        hits_df_lazy: pl.LazyFrame = hits_df.lazy()
+
+    motif_names_list: List[str] = list(motif_names)
+
+    occ_df, coooc = evaluation.get_motif_occurences(hits_df_lazy, motif_names_list)
+
+    report_data, report_df, cwms, trim_bounds = evaluation.tfmodisco_comparison(
+        regions,
+        hits_df,
+        peaks_df,
+        seqlets_df,
+        motifs_df,
+        cwms_modisco,
+        motif_names_list,
+        modisco_half_width,
+        motif_width,
+        compute_recall,
+    )
 
-    report_data, report_df, cwms, trim_bounds = evaluation.tfmodisco_comparison(regions, hits_df, peaks_df, seqlets_df, motifs_df,
-                                                                                cwms_modisco, motif_names, modisco_half_width, 
-                                                                                motif_width, compute_recall)
-    
     if seqlets_df is not None:
-        confusion_df, confusion_mat = evaluation.seqlet_confusion(hits_df, seqlets_df, peaks_df, motif_names, motif_width)
-    
+        confusion_df, confusion_mat = evaluation.seqlet_confusion(
+            hits_df, seqlets_df, peaks_df, motif_names_list, motif_width
+        )
+    else:
+        confusion_df, confusion_mat = None, None
+
     os.makedirs(out_dir, exist_ok=True)
-    
+
     occ_path = os.path.join(out_dir, "motif_occurrences.tsv")
     data_io.write_occ_df(occ_df, occ_path)
 
     data_io.write_report_data(report_df, cwms, out_dir)
 
-    visualization.plot_hit_stat_distributions(hits_df, motif_names, out_dir)
-    visualization.plot_hit_peak_distributions(occ_df, motif_names, out_dir)
-    visualization.plot_peak_motif_indicator_heatmap(coooc, motif_names, out_dir)
+    visualization.plot_hit_stat_distributions(hits_df_lazy, motif_names_list, out_dir)
+    visualization.plot_hit_peak_distributions(occ_df, motif_names_list, out_dir)
+    visualization.plot_peak_motif_indicator_heatmap(coooc, motif_names_list, out_dir)
 
     plot_dir = os.path.join(out_dir, "CWMs")
     visualization.plot_cwms(cwms, trim_bounds, plot_dir)
 
     if seqlets_df is not None:
-        seqlets_df = seqlets_df.collect()
+        seqlets_collected = (
+            seqlets_df.collect() if isinstance(seqlets_df, pl.LazyFrame) else seqlets_df
+        )
         seqlets_path = os.path.join(out_dir, "seqlets.tsv")
-        data_io.write_modisco_seqlets(seqlets_df, seqlets_path)
+        data_io.write_modisco_seqlets(seqlets_collected, seqlets_path)
 
-        seqlet_confusion_path = os.path.join(out_dir, "seqlet_confusion.tsv")
-        data_io.write_seqlet_confusion_df(confusion_df, seqlet_confusion_path)
+        if confusion_df is not None and confusion_mat is not None:
+            seqlet_confusion_path = os.path.join(out_dir, "seqlet_confusion.tsv")
+            data_io.write_seqlet_confusion_df(confusion_df, seqlet_confusion_path)
 
-        visualization.plot_hit_vs_seqlet_counts(report_data, out_dir)
-        visualization.plot_seqlet_confusion_heatmap(confusion_mat, motif_names, out_dir)
+            visualization.plot_hit_vs_seqlet_counts(report_data, out_dir)
+            visualization.plot_seqlet_confusion_heatmap(
+                confusion_mat, motif_names_list, out_dir
+            )
 
     report_path = os.path.join(out_dir, "report.html")
-    visualization.write_report(report_df, motif_names, report_path, compute_recall, seqlets_df is not None)
-
-
-def collapse_hits(hits_path, out_path, overlap_frac):
+    visualization.write_report(
+        report_df, motif_names_list, report_path, compute_recall, seqlets_df is not None
+    )
+
+
+def collapse_hits(hits_path: str, out_path: str, overlap_frac: float) -> None:
+    """Collapse overlapping hits by selecting the best hit per overlapping group.
+
+    This function processes a set of motif hits and identifies overlapping hits,
+    keeping only the hit with the highest similarity score within each overlapping
+    group. This reduces redundancy in hit calls while preserving the most confident
+    predictions.
+
+    Parameters
+    ----------
+    hits_path : str
+        Path to input TSV file containing hit data (hits.tsv or hits_unique.tsv).
+    out_path : str
+        Path to output TSV file with additional 'is_primary' column.
+    overlap_frac : float
+        Minimum fractional overlap for considering hits as overlapping.
+        For hits of lengths x and y, minimum overlap = overlap_frac * (x + y) / 2.
+
+    Notes
+    -----
+    The algorithm uses a sweep line approach with a heap data structure to
+    efficiently identify overlapping intervals and select the best hit based
+    on similarity scores.
+    """
     from . import postprocessing
 
     hits_df = data_io.load_hits(hits_path, lazy=False)
     hits_collapsed_df = postprocessing.collapse_hits(hits_df, overlap_frac)
 
-    data_io.write_hits_processed(hits_collapsed_df, out_path, schema=data_io.HITS_COLLAPSED_DTYPES)
-
-
-def intersect_hits(hits_paths, out_path, relaxed):
+    data_io.write_hits_processed(
+        hits_collapsed_df, out_path, schema=data_io.HITS_COLLAPSED_DTYPES
+    )
+
+
+def intersect_hits(hits_paths: List[str], out_path: str, relaxed: bool) -> None:
+    """Find intersection of hits across multiple Fi-NeMo runs.
+
+    This function identifies motif hits that are consistently called across
+    multiple independent runs, providing a way to assess reproducibility and
+    identify high-confidence hits.
+
+    Parameters
+    ----------
+    hits_paths : List[str]
+        List of paths to input TSV files from different runs.
+    out_path : str
+        Path to output TSV file containing intersection results.
+        Duplicate columns are suffixed with run index.
+    relaxed : bool
+        If True, uses relaxed intersection criteria based only on motif names
+        and untrimmed coordinates. If False, assumes consistent region definitions
+        and motif trimming across runs.
+
+    Notes
+    -----
+    The strict intersection mode requires consistent input regions and motif
+    processing parameters across all runs. The relaxed mode is more permissive
+    but may not be suitable when genomic coordinates are unavailable.
+    """
     from . import postprocessing
 
     hits_dfs = [data_io.load_hits(hits_path, lazy=False) for hits_path in hits_paths]
@@ -268,260 +670,692 @@ def intersect_hits(hits_paths, out_path, relaxed):
     data_io.write_hits_processed(hits_df, out_path, schema=None)
 
 
-def cli():
-    parser = argparse.ArgumentParser()
-    subparsers = parser.add_subparsers(required=True, dest='cmd')
-    
-    
-    extract_regions_bw_parser = subparsers.add_parser("extract-regions-bw", formatter_class=argparse.ArgumentDefaultsHelpFormatter, 
-        help="Extract sequences and contributions from FASTA and bigwig files.")
-    
-    extract_regions_bw_parser.add_argument("-p", "--peaks", type=str, required=True,
-        help="A peak regions file in ENCODE NarrowPeak format.")
-    extract_regions_bw_parser.add_argument("-C", "--chrom-order", type=str, default=None,
-        help="A tab-delimited file with chromosome names in the first column to define sort order of chromosomes. Missing chromosomes are ordered as they appear in -p/--peaks.")
-    extract_regions_bw_parser.add_argument("-f", "--fasta", type=str, required=True,
-        help="A genome FASTA file. If an .fai index file doesn't exist in the same directory, it will be created.")
-    extract_regions_bw_parser.add_argument("-b", "--bigwigs", type=str, required=True, nargs='+',
-        help="One or more bigwig files of contribution scores, with paths delimited by whitespace. Scores are averaged across files.")
-    
-    extract_regions_bw_parser.add_argument("-o", "--out-path", type=str, required=True,
-        help="The path to the output .npz file.")
-    
-    extract_regions_bw_parser.add_argument("-w", "--region-width", type=int, default=1000,
-        help="The width of the input region centered around each peak summit.")
-    
-
-    extract_chrombpnet_regions_h5_parser = subparsers.add_parser("extract-regions-chrombpnet-h5", formatter_class=argparse.ArgumentDefaultsHelpFormatter, 
-        help="Extract sequences and contributions from ChromBPNet contributions H5 files.")
-
-    extract_chrombpnet_regions_h5_parser.add_argument("-p", "--peaks", type=str, default=None,
-        help="A peak regions file in ENCODE NarrowPeak format. If omitted, downstream outputs will lack absolute genomic coordinates.")
-    extract_chrombpnet_regions_h5_parser.add_argument("-C", "--chrom-order", type=str, default=None,
-        help="A tab-delimited file with chromosome names in the first column to define sort order of chromosomes. Missing chromosomes are ordered as they appear in -p/--peaks.")
-
-    extract_chrombpnet_regions_h5_parser.add_argument("-c", "--h5s", type=str, required=True, nargs='+',
-        help="One or more H5 files of contribution scores, with paths delimited by whitespace. Scores are averaged across files.")
-    
-    extract_chrombpnet_regions_h5_parser.add_argument("-o", "--out-path", type=str, required=True,
-        help="The path to the output .npz file.")
-    
-    extract_chrombpnet_regions_h5_parser.add_argument("-w", "--region-width", type=int, default=1000,
-        help="The width of the input region centered around each peak summit.")
-    
-    
-    extract_regions_h5_parser = subparsers.add_parser("extract-regions-h5", formatter_class=argparse.ArgumentDefaultsHelpFormatter, 
-        help="Extract sequences and contributions from ChromBPNet contributions H5 files. DEPRECATED: Use `extract-regions-chrombpnet-h5` instead.")
-
-    extract_regions_h5_parser.add_argument("-p", "--peaks", type=str, default=None,
-        help="A peak regions file in ENCODE NarrowPeak format. If omitted, downstream outputs will lack absolute genomic coordinates.")
-    extract_regions_h5_parser.add_argument("-C", "--chrom-order", type=str, default=None,
-        help="A tab-delimited file with chromosome names in the first column to define sort order of chromosomes. Missing chromosomes are ordered as they appear in -p/--peaks.")
-
-    extract_regions_h5_parser.add_argument("-c", "--h5s", type=str, required=True, nargs='+',
-        help="One or more H5 files of contribution scores, with paths delimited by whitespace. Scores are averaged across files.")
-    
-    extract_regions_h5_parser.add_argument("-o", "--out-path", type=str, required=True,
-        help="The path to the output .npz file.")
-    
-    extract_regions_h5_parser.add_argument("-w", "--region-width", type=int, default=1000,
-        help="The width of the input region centered around each peak summit.")
-    
-
-    extract_bpnet_regions_h5_parser = subparsers.add_parser("extract-regions-bpnet-h5", formatter_class=argparse.ArgumentDefaultsHelpFormatter, 
-        help="Extract sequences and contributions from BPNet contributions H5 files.")
-    
-    extract_bpnet_regions_h5_parser.add_argument("-p", "--peaks", type=str, default=None,
-        help="A peak regions file in ENCODE NarrowPeak format. If omitted, downstream outputs will lack absolute genomic coordinates.")
-    extract_bpnet_regions_h5_parser.add_argument("-C", "--chrom-order", type=str, default=None,
-        help="A tab-delimited file with chromosome names in the first column to define sort order of chromosomes. Missing chromosomes are ordered as they appear in -p/--peaks.")
-    
-    extract_bpnet_regions_h5_parser.add_argument("-c", "--h5s", type=str, required=True, nargs='+',
-        help="One or more H5 files of contribution scores, with paths delimited by whitespace. Scores are averaged across files.")
-    
-    extract_bpnet_regions_h5_parser.add_argument("-o", "--out-path", type=str, required=True,
-        help="The path to the output .npz file.")
-    
-    extract_bpnet_regions_h5_parser.add_argument("-w", "--region-width", type=int, default=1000,
-        help="The width of the input region centered around each peak summit.")
-    
-
-    extract_regions_modisco_fmt_parser = subparsers.add_parser("extract-regions-modisco-fmt", formatter_class=argparse.ArgumentDefaultsHelpFormatter, 
-        help="Extract sequences and contributions from tfmodisco-lite input files.")
-
-    extract_regions_modisco_fmt_parser.add_argument("-p", "--peaks", type=str, default=None,
-        help="A peak regions file in ENCODE NarrowPeak format. If omitted, downstream outputs will lack absolute genomic coordinates.")
-    extract_regions_modisco_fmt_parser.add_argument("-C", "--chrom-order", type=str, default=None,
-        help="A tab-delimited file with chromosome names in the first column to define sort order of chromosomes. Missing chromosomes are ordered as they appear in -p/--peaks.")
-
-    extract_regions_modisco_fmt_parser.add_argument("-s", "--sequences", type=str, required=True,
-        help="A .npy or .npz file containing one-hot encoded sequences.")
-    
-    extract_regions_modisco_fmt_parser.add_argument("-a", "--attributions", type=str, required=True, nargs='+',
-        help="One or more .npy or .npz files of hypothetical contribution scores, with paths delimited by whitespace. Scores are averaged across files.")
-    
-    extract_regions_modisco_fmt_parser.add_argument("-o", "--out-path", type=str, required=True,
-        help="The path to the output .npz file.")
-    
-    extract_regions_modisco_fmt_parser.add_argument("-w", "--region-width", type=int, default=1000,
-        help="The width of the input region centered around each peak summit.")
-    
-
-    call_hits_parser = subparsers.add_parser("call-hits", formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        help="Call hits on provided sequences, contributions, and motif CWM's.")
-    
-    call_hits_parser.add_argument("-M", "--mode", type=str, default="pp", choices={"pp", "ph", "hp", "hh"},
-        help="The type of attributions to use for CWM's and input contribution scores, respectively. 'h' for hypothetical and 'p' for projected.")
-
-    call_hits_parser.add_argument("-r", "--regions", type=str, required=True,
-        help="A .npz file of input sequences, contributions, and coordinates. Can be generated using `finemo extract-regions-*` subcommands.")
-    call_hits_parser.add_argument("-m", "--modisco-h5", type=str, required=True,
-        help="A tfmodisco-lite output H5 file of motif patterns.")
-    
-    call_hits_parser.add_argument("-p", "--peaks", type=str, default=None,
-        help="DEPRECATED: Please provide this file to a preprocessing `finemo extract-regions-*` subcommand instead.")
-    call_hits_parser.add_argument("-C", "--chrom-order", type=str, default=None,
-        help="DEPRECATED: Please provide this file to a preprocessing `finemo extract-regions-*` subcommand instead.")
-    
-    call_hits_parser.add_argument("-I", "--motifs-include", type=str, default=None,
-        help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column to include in hit calling. If omitted, all motifs in the modisco H5 file are used.")
-    call_hits_parser.add_argument("-N", "--motif-names", type=str, default=None,
-        help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column and custom names in the second column. Omitted motifs default to tfmodisco names.")
-
-    call_hits_parser.add_argument("-o", "--out-dir", type=str, required=True,
-        help="The path to the output directory.")
-    
-    call_hits_parser.add_argument("-t", "--cwm-trim-threshold", type=float, default=0.3,
-        help="The default threshold to determine motif start and end positions within the full CWMs.")
-    call_hits_parser.add_argument("-T", "--cwm-trim-thresholds", type=str, default=None,
-        help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column and custom trim thresholds in the second column. Omitted motifs default to the `--cwm-trim-threshold` value.")
-    call_hits_parser.add_argument("-R", "--cwm-trim-coords", type=str, default=None,
-        help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column and custom trim start and end coordinates in the second and third columns, respectively. Omitted motifs default to `--cwm-trim-thresholds` values.")
-    
-    call_hits_parser.add_argument("-l", "--global-lambda", type=float, default=0.7,
-        help="The default L1 regularization weight determining the sparsity of hits.")
-    call_hits_parser.add_argument("-L", "--motif-lambdas", type=str, default=None,
-        help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column and motif-specific lambdas in the second column. Omitted motifs default to the `--global-lambda` value.")
-    call_hits_parser.add_argument("-a", "--alpha", type=float, default=None,
-        help="DEPRECATED: Please use the `--lambda` argument instead.")
-    call_hits_parser.add_argument("-A", "--motif-alphas", type=str, default=None,
-        help="DEPRECATED: Please use the `--motif-lambdas` argument instead.")
-    
-    call_hits_parser.add_argument("-f", "--no-post-filter", action='store_true',
-        help="Do not perform post-hit-calling filtering. By default, hits are filtered based on a minimum cosine similarity of `lambda` with the input contributions.")
-    call_hits_parser.add_argument("-q", "--sqrt-transform", action='store_true',
-        help="Apply a signed square root transform to the input contributions and CWMs before hit calling.")
-    call_hits_parser.add_argument("-s", "--step-size-max", type=float, default=3.,
-        help="The maximum optimizer step size.")
-    call_hits_parser.add_argument("-i", "--step-size-min", type=float, default=0.08,
-        help="The minimum optimizer step size.")
-    call_hits_parser.add_argument("-j", "--step-adjust", type=float, default=0.7,
-        help="The optimizer step size adjustment factor. If the optimizer diverges, the step size is multiplicatively adjusted by this factor")
-    call_hits_parser.add_argument("-c", "--convergence-tol", type=float, default=0.0005,
-        help="The tolerance for determining convergence. The optimizer exits when the duality gap is less than the tolerance.")
-    call_hits_parser.add_argument("-S", "--max-steps", type=int, default=10000,
-        help="The maximum number of optimization steps.")
-    call_hits_parser.add_argument("-b", "--batch-size", type=int, default=2000,
-        help="The batch size used for optimization.")
-    call_hits_parser.add_argument("-d", "--device", type=str, default=None,
-        help="DEPRECATED: Please use the `CUDA_VISIBLE_DEVICES` environment variable to specify the GPU device.")
-    call_hits_parser.add_argument("-J", "--compile", action='store_true',
-        help="JIT-compile the optimizer for faster performance. This may not be supported on older GPUs.")
-    
-
-    report_parser = subparsers.add_parser("report", formatter_class=argparse.ArgumentDefaultsHelpFormatter, 
-        help="Generate statistics and visualizations from hits and tfmodisco-lite motif data.")
-    
-    report_parser.add_argument("-r", "--regions", type=str, required=True,
-        help="A .npz file containing input sequences, contributions, and coordinates. Must be the same as that used for `finemo call-hits`.")
-    report_parser.add_argument("-H", "--hits", type=str, required=True,
-        help="The output directory generated by the `finemo call-hits` command on the regions specified in `--regions`.")
-    report_parser.add_argument("-p", "--peaks", type=str, default=None,
-        help="DEPRECATED: Please provide this file to a preprocessing `finemo extract-regions-*` subcommand instead.")
-    report_parser.add_argument("-m", "--modisco-h5", type=str, default=None,
-        help="The tfmodisco-lite output H5 file of motif patterns. Must be the same as that used for hit calling unless `--no-recall` is set. If omitted, seqlet-derived metrics will not be computed.")
-    report_parser.add_argument("-I", "--motifs-include", type=str, default=None,
-        help="DEPRECATED: This information is now inferred from the outputs of `finemo call-hits`.")
-    report_parser.add_argument("-N", "--motif-names", type=str, default=None,
-        help="DEPRECATED: This information is now inferred from the outputs of `finemo call-hits`.")
-
-    report_parser.add_argument("-o", "--out-dir", type=str, required=True,
-        help="The path to the report output directory.")
-    
-    report_parser.add_argument("-W", "--modisco-region-width", type=int, default=400,
-        help="The width of the region around each peak summit used by tfmodisco-lite.")
-    report_parser.add_argument("-t", "--cwm-trim-threshold", type=float, default=0.3,
-        help="DEPRECATED: This information is now inferred from the outputs of `finemo call-hits`.")
-    report_parser.add_argument("-n", "--no-recall", action='store_true',
-        help="Do not compute motif recall metrics.")
-    report_parser.add_argument("-s", "--no-seqlets", action='store_true',
-        help="DEPRECATED: Please omit the `--modisco-h5` argument instead.")
-
-
-    collapse_hits_parser = subparsers.add_parser("collapse-hits", formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        help="Identify best hit by motif similarity among sets of overlapping hits.")
-    
-    collapse_hits_parser.add_argument("-i", "--hits", type=str, required=True,
-        help="The `hits.tsv` or `hits_unique.tsv` file from `call-hits`.")
-    collapse_hits_parser.add_argument("-o", "--out-path", type=str, required=True,
-        help="The path to the output .tsv file with an additional \"is_primary\" column.")
-    collapse_hits_parser.add_argument("-O", "--overlap-frac", type=float, default=0.2,
-        help="The threshold for determining overlapping hits. For two hits with lengths x and y, the minimum overlap is defined as `overlap_frac * (x + y) / 2`. The default value of 0.2 means that two hits must overlap by at least 20% of their average lengths to be considered overlapping.")
-    
-
-    intersect_hits_parser = subparsers.add_parser("intersect-hits", formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        help="Intersect hits across multiple runs.")
-    
-    intersect_hits_parser.add_argument("-i", "--hits", type=str, required=True, nargs='+',
-        help="One or more hits.tsv or hits_unique.tsv files, with paths delimited by whitespace.")
-    intersect_hits_parser.add_argument("-o", "--out-path", type=str, required=True,
-        help="The path to the output .tsv file. Duplicate columns are suffixed with the positional index of the input file.")
-    intersect_hits_parser.add_argument("-r", "--relaxed", action='store_true',
-        help="Use relaxed intersection criteria, using only motif names and untrimmed coordinates. By default, the intersection assumes consistent region definitions and motif trimming. This option is not recommended if genomic coordinates are unavailable.")
+def cli() -> None:
+    """Command-line interface for the Fi-NeMo motif instance calling pipeline.
 
+    This function provides the main entry point for all Fi-NeMo operations including:
+    - Data preprocessing from various formats (bigWig, HDF5, TF-MoDISCo)
+    - Motif hit calling using the Fi-NeMo algorithm
+    - Report generation and visualization
+    - Post-processing operations (hit collapsing, intersection)
+
+    The CLI supports comprehensive workflows for transcription factor motif
+    analysis from raw genomic data to publication-ready visualizations.
+    """
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(required=True, dest="cmd")
+
+    extract_regions_bw_parser = subparsers.add_parser(
+        "extract-regions-bw",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        help="Extract sequences and contributions from FASTA and bigwig files.",
+    )
+
+    extract_regions_bw_parser.add_argument(
+        "-p",
+        "--peaks",
+        type=str,
+        required=True,
+        help="A peak regions file in ENCODE NarrowPeak format.",
+    )
+    extract_regions_bw_parser.add_argument(
+        "-C",
+        "--chrom-order",
+        type=str,
+        default=None,
+        help="A tab-delimited file with chromosome names in the first column to define sort order of chromosomes. Missing chromosomes are ordered as they appear in -p/--peaks.",
+    )
+    extract_regions_bw_parser.add_argument(
+        "-f",
+        "--fasta",
+        type=str,
+        required=True,
+        help="A genome FASTA file. If an .fai index file doesn't exist in the same directory, it will be created.",
+    )
+    extract_regions_bw_parser.add_argument(
+        "-b",
+        "--bigwigs",
+        type=str,
+        required=True,
+        nargs="+",
+        help="One or more bigwig files of contribution scores, with paths delimited by whitespace. Scores are averaged across files.",
+    )
+
+    extract_regions_bw_parser.add_argument(
+        "-o",
+        "--out-path",
+        type=str,
+        required=True,
+        help="The path to the output .npz file.",
+    )
+
+    extract_regions_bw_parser.add_argument(
+        "-w",
+        "--region-width",
+        type=int,
+        default=1000,
+        help="The width of the input region centered around each peak summit.",
+    )
+
+    extract_chrombpnet_regions_h5_parser = subparsers.add_parser(
+        "extract-regions-chrombpnet-h5",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        help="Extract sequences and contributions from ChromBPNet contributions H5 files.",
+    )
+
+    extract_chrombpnet_regions_h5_parser.add_argument(
+        "-p",
+        "--peaks",
+        type=str,
+        default=None,
+        help="A peak regions file in ENCODE NarrowPeak format. If omitted, downstream outputs will lack absolute genomic coordinates.",
+    )
+    extract_chrombpnet_regions_h5_parser.add_argument(
+        "-C",
+        "--chrom-order",
+        type=str,
+        default=None,
+        help="A tab-delimited file with chromosome names in the first column to define sort order of chromosomes. Missing chromosomes are ordered as they appear in -p/--peaks.",
+    )
+
+    extract_chrombpnet_regions_h5_parser.add_argument(
+        "-c",
+        "--h5s",
+        type=str,
+        required=True,
+        nargs="+",
+        help="One or more H5 files of contribution scores, with paths delimited by whitespace. Scores are averaged across files.",
+    )
+
+    extract_chrombpnet_regions_h5_parser.add_argument(
+        "-o",
+        "--out-path",
+        type=str,
+        required=True,
+        help="The path to the output .npz file.",
+    )
+
+    extract_chrombpnet_regions_h5_parser.add_argument(
+        "-w",
+        "--region-width",
+        type=int,
+        default=1000,
+        help="The width of the input region centered around each peak summit.",
+    )
+
+    extract_regions_h5_parser = subparsers.add_parser(
+        "extract-regions-h5",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        help="Extract sequences and contributions from ChromBPNet contributions H5 files. DEPRECATED: Use `extract-regions-chrombpnet-h5` instead.",
+    )
+
+    extract_regions_h5_parser.add_argument(
+        "-p",
+        "--peaks",
+        type=str,
+        default=None,
+        help="A peak regions file in ENCODE NarrowPeak format. If omitted, downstream outputs will lack absolute genomic coordinates.",
+    )
+    extract_regions_h5_parser.add_argument(
+        "-C",
+        "--chrom-order",
+        type=str,
+        default=None,
+        help="A tab-delimited file with chromosome names in the first column to define sort order of chromosomes. Missing chromosomes are ordered as they appear in -p/--peaks.",
+    )
+
+    extract_regions_h5_parser.add_argument(
+        "-c",
+        "--h5s",
+        type=str,
+        required=True,
+        nargs="+",
+        help="One or more H5 files of contribution scores, with paths delimited by whitespace. Scores are averaged across files.",
+    )
+
+    extract_regions_h5_parser.add_argument(
+        "-o",
+        "--out-path",
+        type=str,
+        required=True,
+        help="The path to the output .npz file.",
+    )
+
+    extract_regions_h5_parser.add_argument(
+        "-w",
+        "--region-width",
+        type=int,
+        default=1000,
+        help="The width of the input region centered around each peak summit.",
+    )
+
+    extract_bpnet_regions_h5_parser = subparsers.add_parser(
+        "extract-regions-bpnet-h5",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        help="Extract sequences and contributions from BPNet contributions H5 files.",
+    )
+
+    extract_bpnet_regions_h5_parser.add_argument(
+        "-p",
+        "--peaks",
+        type=str,
+        default=None,
+        help="A peak regions file in ENCODE NarrowPeak format. If omitted, downstream outputs will lack absolute genomic coordinates.",
+    )
+    extract_bpnet_regions_h5_parser.add_argument(
+        "-C",
+        "--chrom-order",
+        type=str,
+        default=None,
+        help="A tab-delimited file with chromosome names in the first column to define sort order of chromosomes. Missing chromosomes are ordered as they appear in -p/--peaks.",
+    )
+
+    extract_bpnet_regions_h5_parser.add_argument(
+        "-c",
+        "--h5s",
+        type=str,
+        required=True,
+        nargs="+",
+        help="One or more H5 files of contribution scores, with paths delimited by whitespace. Scores are averaged across files.",
+    )
+
+    extract_bpnet_regions_h5_parser.add_argument(
+        "-o",
+        "--out-path",
+        type=str,
+        required=True,
+        help="The path to the output .npz file.",
+    )
+
+    extract_bpnet_regions_h5_parser.add_argument(
+        "-w",
+        "--region-width",
+        type=int,
+        default=1000,
+        help="The width of the input region centered around each peak summit.",
+    )
+
+    extract_regions_modisco_fmt_parser = subparsers.add_parser(
+        "extract-regions-modisco-fmt",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        help="Extract sequences and contributions from tfmodisco-lite input files.",
+    )
+
+    extract_regions_modisco_fmt_parser.add_argument(
+        "-p",
+        "--peaks",
+        type=str,
+        default=None,
+        help="A peak regions file in ENCODE NarrowPeak format. If omitted, downstream outputs will lack absolute genomic coordinates.",
+    )
+    extract_regions_modisco_fmt_parser.add_argument(
+        "-C",
+        "--chrom-order",
+        type=str,
+        default=None,
+        help="A tab-delimited file with chromosome names in the first column to define sort order of chromosomes. Missing chromosomes are ordered as they appear in -p/--peaks.",
+    )
+
+    extract_regions_modisco_fmt_parser.add_argument(
+        "-s",
+        "--sequences",
+        type=str,
+        required=True,
+        help="A .npy or .npz file containing one-hot encoded sequences.",
+    )
+
+    extract_regions_modisco_fmt_parser.add_argument(
+        "-a",
+        "--attributions",
+        type=str,
+        required=True,
+        nargs="+",
+        help="One or more .npy or .npz files of hypothetical contribution scores, with paths delimited by whitespace. Scores are averaged across files.",
+    )
+
+    extract_regions_modisco_fmt_parser.add_argument(
+        "-o",
+        "--out-path",
+        type=str,
+        required=True,
+        help="The path to the output .npz file.",
+    )
+
+    extract_regions_modisco_fmt_parser.add_argument(
+        "-w",
+        "--region-width",
+        type=int,
+        default=1000,
+        help="The width of the input region centered around each peak summit.",
+    )
+
+    call_hits_parser = subparsers.add_parser(
+        "call-hits",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        help="Call hits on provided sequences, contributions, and motif CWM's.",
+    )
+
+    call_hits_parser.add_argument(
+        "-M",
+        "--mode",
+        type=str,
+        default="pp",
+        choices={"pp", "ph", "hp", "hh"},
+        help="The type of attributions to use for CWM's and input contribution scores, respectively. 'h' for hypothetical and 'p' for projected.",
+    )
+
+    call_hits_parser.add_argument(
+        "-r",
+        "--regions",
+        type=str,
+        required=True,
+        help="A .npz file of input sequences, contributions, and coordinates. Can be generated using `finemo extract-regions-*` subcommands.",
+    )
+    call_hits_parser.add_argument(
+        "-m",
+        "--modisco-h5",
+        type=str,
+        required=True,
+        help="A tfmodisco-lite output H5 file of motif patterns.",
+    )
+
+    call_hits_parser.add_argument(
+        "-p",
+        "--peaks",
+        type=str,
+        default=None,
+        help="DEPRECATED: Please provide this file to a preprocessing `finemo extract-regions-*` subcommand instead.",
+    )
+    call_hits_parser.add_argument(
+        "-C",
+        "--chrom-order",
+        type=str,
+        default=None,
+        help="DEPRECATED: Please provide this file to a preprocessing `finemo extract-regions-*` subcommand instead.",
+    )
+
+    call_hits_parser.add_argument(
+        "-I",
+        "--motifs-include",
+        type=str,
+        default=None,
+        help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column to include in hit calling. If omitted, all motifs in the modisco H5 file are used.",
+    )
+    call_hits_parser.add_argument(
+        "-N",
+        "--motif-names",
+        type=str,
+        default=None,
+        help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column and custom names in the second column. Omitted motifs default to tfmodisco names.",
+    )
+
+    call_hits_parser.add_argument(
+        "-o",
+        "--out-dir",
+        type=str,
+        required=True,
+        help="The path to the output directory.",
+    )
+
+    call_hits_parser.add_argument(
+        "-t",
+        "--cwm-trim-threshold",
+        type=float,
+        default=0.3,
+        help="The default threshold to determine motif start and end positions within the full CWMs.",
+    )
+    call_hits_parser.add_argument(
+        "-T",
+        "--cwm-trim-thresholds",
+        type=str,
+        default=None,
+        help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column and custom trim thresholds in the second column. Omitted motifs default to the `--cwm-trim-threshold` value.",
+    )
+    call_hits_parser.add_argument(
+        "-R",
+        "--cwm-trim-coords",
+        type=str,
+        default=None,
+        help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column and custom trim start and end coordinates in the second and third columns, respectively. Omitted motifs default to `--cwm-trim-thresholds` values.",
+    )
+
+    call_hits_parser.add_argument(
+        "-l",
+        "--global-lambda",
+        type=float,
+        default=0.7,
+        help="The default L1 regularization weight determining the sparsity of hits.",
+    )
+    call_hits_parser.add_argument(
+        "-L",
+        "--motif-lambdas",
+        type=str,
+        default=None,
+        help="A tab-delimited file with tfmodisco motif names (e.g pos_patterns.pattern_0) in the first column and motif-specific lambdas in the second column. Omitted motifs default to the `--global-lambda` value.",
+    )
+    call_hits_parser.add_argument(
+        "-a",
+        "--alpha",
+        type=float,
+        default=None,
+        help="DEPRECATED: Please use the `--lambda` argument instead.",
+    )
+    call_hits_parser.add_argument(
+        "-A",
+        "--motif-alphas",
+        type=str,
+        default=None,
+        help="DEPRECATED: Please use the `--motif-lambdas` argument instead.",
+    )
+
+    call_hits_parser.add_argument(
+        "-f",
+        "--no-post-filter",
+        action="store_true",
+        help="Do not perform post-hit-calling filtering. By default, hits are filtered based on a minimum cosine similarity of `lambda` with the input contributions.",
+    )
+    call_hits_parser.add_argument(
+        "-q",
+        "--sqrt-transform",
+        action="store_true",
+        help="Apply a signed square root transform to the input contributions and CWMs before hit calling.",
+    )
+    call_hits_parser.add_argument(
+        "-s",
+        "--step-size-max",
+        type=float,
+        default=3.0,
+        help="The maximum optimizer step size.",
+    )
+    call_hits_parser.add_argument(
+        "-i",
+        "--step-size-min",
+        type=float,
+        default=0.08,
+        help="The minimum optimizer step size.",
+    )
+    call_hits_parser.add_argument(
+        "-j",
+        "--step-adjust",
+        type=float,
+        default=0.7,
+        help="The optimizer step size adjustment factor. If the optimizer diverges, the step size is multiplicatively adjusted by this factor",
+    )
+    call_hits_parser.add_argument(
+        "-c",
+        "--convergence-tol",
+        type=float,
+        default=0.0005,
+        help="The tolerance for determining convergence. The optimizer exits when the duality gap is less than the tolerance.",
+    )
+    call_hits_parser.add_argument(
+        "-S",
+        "--max-steps",
+        type=int,
+        default=10000,
+        help="The maximum number of optimization steps.",
+    )
+    call_hits_parser.add_argument(
+        "-b",
+        "--batch-size",
+        type=int,
+        default=2000,
+        help="The batch size used for optimization.",
+    )
+    call_hits_parser.add_argument(
+        "-d",
+        "--device",
+        type=str,
+        default=None,
+        help="DEPRECATED: Please use the `CUDA_VISIBLE_DEVICES` environment variable to specify the GPU device.",
+    )
+    call_hits_parser.add_argument(
+        "-J",
+        "--compile",
+        action="store_true",
+        help="JIT-compile the optimizer for faster performance. This may not be supported on older GPUs.",
+    )
+
+    report_parser = subparsers.add_parser(
+        "report",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        help="Generate statistics and visualizations from hits and tfmodisco-lite motif data.",
+    )
+
+    report_parser.add_argument(
+        "-r",
+        "--regions",
+        type=str,
+        required=True,
+        help="A .npz file containing input sequences, contributions, and coordinates. Must be the same as that used for `finemo call-hits`.",
+    )
+    report_parser.add_argument(
+        "-H",
+        "--hits",
+        type=str,
+        required=True,
+        help="The output directory generated by the `finemo call-hits` command on the regions specified in `--regions`.",
+    )
+    report_parser.add_argument(
+        "-p",
+        "--peaks",
+        type=str,
+        default=None,
+        help="DEPRECATED: Please provide this file to a preprocessing `finemo extract-regions-*` subcommand instead.",
+    )
+    report_parser.add_argument(
+        "-m",
+        "--modisco-h5",
+        type=str,
+        default=None,
+        help="The tfmodisco-lite output H5 file of motif patterns. Must be the same as that used for hit calling unless `--no-recall` is set. If omitted, seqlet-derived metrics will not be computed.",
+    )
+    report_parser.add_argument(
+        "-I",
+        "--motifs-include",
+        type=str,
+        default=None,
+        help="DEPRECATED: This information is now inferred from the outputs of `finemo call-hits`.",
+    )
+    report_parser.add_argument(
+        "-N",
+        "--motif-names",
+        type=str,
+        default=None,
+        help="DEPRECATED: This information is now inferred from the outputs of `finemo call-hits`.",
+    )
+
+    report_parser.add_argument(
+        "-o",
+        "--out-dir",
+        type=str,
+        required=True,
+        help="The path to the report output directory.",
+    )
+
+    report_parser.add_argument(
+        "-W",
+        "--modisco-region-width",
+        type=int,
+        default=400,
+        help="The width of the region around each peak summit used by tfmodisco-lite.",
+    )
+    report_parser.add_argument(
+        "-t",
+        "--cwm-trim-threshold",
+        type=float,
+        default=0.3,
+        help="DEPRECATED: This information is now inferred from the outputs of `finemo call-hits`.",
+    )
+    report_parser.add_argument(
+        "-n",
+        "--no-recall",
+        action="store_true",
+        help="Do not compute motif recall metrics.",
+    )
+    report_parser.add_argument(
+        "-s",
+        "--no-seqlets",
+        action="store_true",
+        help="DEPRECATED: Please omit the `--modisco-h5` argument instead.",
+    )
+
+    collapse_hits_parser = subparsers.add_parser(
+        "collapse-hits",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        help="Identify best hit by motif similarity among sets of overlapping hits.",
+    )
+
+    collapse_hits_parser.add_argument(
+        "-i",
+        "--hits",
+        type=str,
+        required=True,
+        help="The `hits.tsv` or `hits_unique.tsv` file from `call-hits`.",
+    )
+    collapse_hits_parser.add_argument(
+        "-o",
+        "--out-path",
+        type=str,
+        required=True,
+        help='The path to the output .tsv file with an additional "is_primary" column.',
+    )
+    collapse_hits_parser.add_argument(
+        "-O",
+        "--overlap-frac",
+        type=float,
+        default=0.2,
+        help="The threshold for determining overlapping hits. For two hits with lengths x and y, the minimum overlap is defined as `overlap_frac * (x + y) / 2`. The default value of 0.2 means that two hits must overlap by at least 20% of their average lengths to be considered overlapping.",
+    )
+
+    intersect_hits_parser = subparsers.add_parser(
+        "intersect-hits",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        help="Intersect hits across multiple runs.",
+    )
+
+    intersect_hits_parser.add_argument(
+        "-i",
+        "--hits",
+        type=str,
+        required=True,
+        nargs="+",
+        help="One or more hits.tsv or hits_unique.tsv files, with paths delimited by whitespace.",
+    )
+    intersect_hits_parser.add_argument(
+        "-o",
+        "--out-path",
+        type=str,
+        required=True,
+        help="The path to the output .tsv file. Duplicate columns are suffixed with the positional index of the input file.",
+    )
+    intersect_hits_parser.add_argument(
+        "-r",
+        "--relaxed",
+        action="store_true",
+        help="Use relaxed intersection criteria, using only motif names and untrimmed coordinates. By default, the intersection assumes consistent region definitions and motif trimming. This option is not recommended if genomic coordinates are unavailable.",
+    )
 
     args = parser.parse_args()
-    
+
     if args.cmd == "extract-regions-bw":
-        extract_regions_bw(args.peaks, args.chrom_order, args.fasta, args.bigwigs, args.out_path, args.region_width)
+        extract_regions_bw(
+            args.peaks,
+            args.chrom_order,
+            args.fasta,
+            args.bigwigs,
+            args.out_path,
+            args.region_width,
+        )
 
     elif args.cmd == "extract-regions-chrombpnet-h5":
-        extract_regions_chrombpnet_h5(args.peaks, args.chrom_order, args.h5s, args.out_path, args.region_width)
+        extract_regions_chrombpnet_h5(
+            args.peaks, args.chrom_order, args.h5s, args.out_path, args.region_width
+        )
 
     elif args.cmd == "extract-regions-h5":
-        print("WARNING: The `extract-regions-h5` command is deprecated. Use `extract-regions-chrombpnet-h5` instead.")
-        extract_regions_chrombpnet_h5(args.peaks, args.chrom_order, args.h5s, args.out_path, args.region_width)
+        print(
+            "WARNING: The `extract-regions-h5` command is deprecated. Use `extract-regions-chrombpnet-h5` instead."
+        )
+        extract_regions_chrombpnet_h5(
+            args.peaks, args.chrom_order, args.h5s, args.out_path, args.region_width
+        )
 
     elif args.cmd == "extract-regions-bpnet-h5":
-        extract_regions_bpnet_h5(args.peaks, args.chrom_order, args.h5s, args.out_path, args.region_width)
+        extract_regions_bpnet_h5(
+            args.peaks, args.chrom_order, args.h5s, args.out_path, args.region_width
+        )
 
     elif args.cmd == "extract-regions-modisco-fmt":
-        extract_regions_modisco_fmt(args.peaks, args.chrom_order, args.attributions, args.sequences, args.out_path, args.region_width)
-    
+        extract_regions_modisco_fmt(
+            args.peaks,
+            args.chrom_order,
+            args.attributions,
+            args.sequences,
+            args.out_path,
+            args.region_width,
+        )
+
     elif args.cmd == "call-hits":
         if args.alpha is not None:
-            warnings.warn("The `--alpha` flag is deprecated and will be removed in a future version. Please use the `--global-lambda` flag instead.")
+            warnings.warn(
+                "The `--alpha` flag is deprecated and will be removed in a future version. Please use the `--global-lambda` flag instead."
+            )
             args.global_lambda = args.alpha
         if args.motif_alphas is not None:
-            warnings.warn("The `--motif-alphas` flag is deprecated and will be removed in a future version. Please use the `--motif-lambdas` flag instead.")
+            warnings.warn(
+                "The `--motif-alphas` flag is deprecated and will be removed in a future version. Please use the `--motif-lambdas` flag instead."
+            )
             args.motif_lambdas = args.motif_alphas
 
-        call_hits(args.regions, args.peaks, args.modisco_h5, args.chrom_order, args.motifs_include, args.motif_names, 
-                  args.motif_lambdas, args.out_dir, args.cwm_trim_coords, args.cwm_trim_thresholds, args.cwm_trim_threshold, 
-                  args.global_lambda, args.step_size_max, args.step_size_min, args.sqrt_transform, args.convergence_tol, 
-                  args.max_steps, args.batch_size, args.step_adjust, args.device, args.mode, args.no_post_filter, args.compile)
+        call_hits(
+            args.regions,
+            args.peaks,
+            args.modisco_h5,
+            args.chrom_order,
+            args.motifs_include,
+            args.motif_names,
+            args.motif_lambdas,
+            args.out_dir,
+            args.cwm_trim_coords,
+            args.cwm_trim_thresholds,
+            args.cwm_trim_threshold,
+            args.global_lambda,
+            args.step_size_max,
+            args.step_size_min,
+            args.sqrt_transform,
+            args.convergence_tol,
+            args.max_steps,
+            args.batch_size,
+            args.step_adjust,
+            args.device,
+            args.mode,
+            args.no_post_filter,
+            args.compile,
+        )
 
     elif args.cmd == "report":
         if args.no_recall and not args.no_seqlets:
-            raise ValueError("The `--no-seqlets` flag must be set in conjunction with `--no-recall`.")
-        
-        report(args.regions, args.hits, args.modisco_h5, args.peaks, args.motifs_include, 
-               args.motif_names, args.out_dir, args.modisco_region_width, args.cwm_trim_threshold, 
-               not args.no_recall, not args.no_seqlets)
+            raise ValueError(
+                "The `--no-seqlets` flag must be set in conjunction with `--no-recall`."
+            )
+
+        report(
+            args.regions,
+            args.hits,
+            args.modisco_h5,
+            args.peaks,
+            args.motifs_include,
+            args.motif_names,
+            args.out_dir,
+            args.modisco_region_width,
+            args.cwm_trim_threshold,
+            not args.no_recall,
+            not args.no_seqlets,
+        )
 
     elif args.cmd == "collapse-hits":
         collapse_hits(args.hits, args.out_path, args.overlap_frac)
 
     elif args.cmd == "intersect-hits":
         intersect_hits(args.hits, args.out_path, args.relaxed)
-
diff --git a/src/finemo/postprocessing.py b/src/finemo/postprocessing.py
index 5663a4a..55c616b 100644
--- a/src/finemo/postprocessing.py
+++ b/src/finemo/postprocessing.py
@@ -1,19 +1,78 @@
+"""Post-processing utilities for Fi-NeMo hit calling results.
+
+This module provides functions for:
+- Collapsing overlapping hits based on similarity scores
+- Intersecting hit sets across multiple runs
+- Quality control and filtering operations
+
+The main operations are optimized using Numba for efficient processing
+of large hit datasets.
+"""
+
 import heapq
+from typing import List, Union
 
 import numpy as np
+from numpy import ndarray
 import polars as pl
 from numba import njit
-from numba.types import Array, uint32, int32, float32
+from numba.types import Array, uint32, int32, float32  # type: ignore[attr-defined]
+from jaxtyping import Float, Int
+
 
 @njit(
     uint32[:](
-        Array(uint32, 1, 'C', readonly=True), 
-        Array(int32, 1, 'C', readonly=True), 
-        Array(int32, 1, 'C', readonly=True), 
-        Array(float32, 1, 'C', readonly=True)
-    ), cache=True
+        Array(uint32, 1, "C", readonly=True),
+        Array(int32, 1, "C", readonly=True),
+        Array(int32, 1, "C", readonly=True),
+        Array(float32, 1, "C", readonly=True),
+    ),
+    cache=True,
 )
-def _collapse_hits(chrom_ids, starts, ends, similarities):
+def _collapse_hits(
+    chrom_ids: Int[ndarray, " N"],
+    starts: Int[ndarray, " N"],
+    ends: Int[ndarray, " N"],
+    similarities: Float[ndarray, " N"],
+) -> Int[ndarray, " N"]:
+    """Identify primary hits among overlapping hits using a sweep line algorithm.
+
+    This function uses a heap-based sweep line algorithm to efficiently identify
+    the best hit (highest similarity) among sets of overlapping hits within each
+    chromosome. Only one hit per overlapping group is marked as primary.
+
+    Parameters
+    ----------
+    chrom_ids : Int[ndarray, "N"]
+        Chromosome identifiers for each hit, where N is the number of hits.
+        Dtype should be uint32 for Numba compatibility.
+    starts : Int[ndarray, "N"]
+        Start positions of hits (adjusted for overlap computation).
+        Dtype should be int32 for Numba compatibility.
+    ends : Int[ndarray, "N"]
+        End positions of hits (adjusted for overlap computation).
+        Dtype should be int32 for Numba compatibility.
+    similarities : Float[ndarray, "N"]
+        Similarity scores used for selecting the best hit.
+        Dtype should be float32 for Numba compatibility.
+
+    Returns
+    -------
+    Int[ndarray, "N"]
+        Binary array where 1 indicates the hit is primary, 0 otherwise.
+        Returns uint32 array for consistency with input types.
+
+    Notes
+    -----
+    This function is JIT-compiled with Numba for performance on large datasets.
+    The algorithm maintains active intervals in a heap and resolves overlaps
+    by keeping only the hit with the highest similarity score.
+
+    The sweep line algorithm processes hits in order and maintains a heap of
+    currently active intervals. When a new interval is encountered, it is
+    compared against all overlapping intervals in the heap, and only the
+    interval with the highest similarity score remains marked as primary.
+    """
     n = chrom_ids.shape[0]
     out = np.ones(n, dtype=np.uint32)
     heap = [(np.uint32(0), np.int32(0), -1) for _ in range(0)]
@@ -24,78 +83,216 @@ def _collapse_hits(chrom_ids, starts, ends, similarities):
         end_new = ends[i]
         sim_new = similarities[i]
 
+        # Remove expired intervals from heap
         while heap and heap[0] < (chrom_new, start_new, -1):
             heapq.heappop(heap)
 
+        # Check overlaps with active intervals
         for _, _, idx in heap:
             cmp = sim_new > similarities[idx]
             out[idx] &= cmp
             out[i] &= not cmp
 
+        # Add current interval to heap
         heapq.heappush(heap, (chrom_new, end_new, i))
 
     return out
 
 
-def collapse_hits(hits_df, overlap_frac):
+def collapse_hits(
+    hits_df: Union[pl.DataFrame, pl.LazyFrame], overlap_frac: float
+) -> pl.DataFrame:
+    """Collapse overlapping hits by selecting the best hit per overlapping group.
+
+    This function identifies overlapping hits and marks only the highest-similarity
+    hit as primary in each overlapping group. Overlap is determined by a fractional
+    threshold based on the average length of the two hits being compared.
+
+    Parameters
+    ----------
+    hits_df : Union[pl.DataFrame, pl.LazyFrame]
+        Hit data containing required columns: chr (or peak_id if no chr), start, end,
+        hit_similarity. Will be collected to DataFrame if passed as LazyFrame.
+    overlap_frac : float
+        Overlap fraction threshold for considering hits as overlapping.
+        For two hits with lengths x and y, minimum overlap = overlap_frac * (x + y) / 2.
+        Must be between 0 and 1, where 0 means any overlap and 1 means complete overlap.
+
+    Returns
+    -------
+    pl.DataFrame
+        Original hit data with an additional 'is_primary' column (1 for primary hits, 0 otherwise).
+        All original columns are preserved, with the new column added at the end.
+
+    Raises
+    ------
+    KeyError
+        If required columns (chr/peak_id, start, end, hit_similarity) are missing.
+
+    Notes
+    -----
+    The algorithm transforms coordinates by scaling by 2 and adjusting by the overlap
+    fraction to create effective overlap regions for efficient processing. This allows
+    using a sweep line algorithm to identify overlaps in a single pass.
+
+    The transformation works as follows:
+    - Original coordinates: [start, end]
+    - Length = end - start
+    - Adjusted start = start * 2 + length * overlap_frac
+    - Adjusted end = end * 2 - length * overlap_frac
+
+    This creates regions that overlap only when the original regions have sufficient
+    overlap according to the specified fraction.
+
+    Examples
+    --------
+    >>> hits_collapsed = collapse_hits(hits_df, overlap_frac=0.2)
+    >>> primary_hits = hits_collapsed.filter(pl.col("is_primary") == 1)
+    >>> print(f"Kept {primary_hits.height}/{hits_df.height} hits as primary")
+    """
+    # Ensure we're working with a DataFrame
+    if isinstance(hits_df, pl.LazyFrame):
+        hits_df = hits_df.collect()
+
     chroms = hits_df["chr"].unique(maintain_order=True)
 
     if not chroms.is_empty():
-        chrom_to_id = {
-            chrom: i for i, chrom in enumerate(chroms)
-        }
+        chrom_to_id = {chrom: i for i, chrom in enumerate(chroms)}
+        # Transform coordinates for overlap computation
+        # Scale by 2 and adjust by overlap fraction to create effective overlap regions
         df = hits_df.select(
             chrom_id=pl.col("chr").replace_strict(chrom_to_id, return_dtype=pl.UInt32),
-            start_trim=pl.col("start") * 2 + ((pl.col("end") - pl.col("start")) * overlap_frac).cast(pl.Int32),
-            end_trim=pl.col("end") * 2 - ((pl.col("end") - pl.col("start")) * overlap_frac).cast(pl.Int32),
-            similarity=pl.col("hit_similarity")
+            start_trim=pl.col("start") * 2
+            + ((pl.col("end") - pl.col("start")) * overlap_frac).cast(pl.Int32),
+            end_trim=pl.col("end") * 2
+            - ((pl.col("end") - pl.col("start")) * overlap_frac).cast(pl.Int32),
+            similarity=pl.col("hit_similarity"),
         )
     else:
+        # Fall back to peak_id when chr column is not available
         df = hits_df.select(
             chrom_id=pl.col("peak_id"),
-            start_trim=pl.col("start") * 2 + ((pl.col("end") - pl.col("start")) * overlap_frac).cast(pl.Int32),
-            end_trim=pl.col("end") * 2 - ((pl.col("end") - pl.col("start")) * overlap_frac).cast(pl.Int32),
-            similarity=pl.col("hit_similarity")
+            start_trim=pl.col("start") * 2
+            + ((pl.col("end") - pl.col("start")) * overlap_frac).cast(pl.Int32),
+            end_trim=pl.col("end") * 2
+            - ((pl.col("end") - pl.col("start")) * overlap_frac).cast(pl.Int32),
+            similarity=pl.col("hit_similarity"),
         )
 
+    # Rechunk for efficient array access
     df = df.rechunk()
     chrom_ids = df["chrom_id"].to_numpy(allow_copy=False)
     starts = df["start_trim"].to_numpy(allow_copy=False)
     ends = df["end_trim"].to_numpy(allow_copy=False)
     similarities = df["similarity"].to_numpy(allow_copy=False)
+
+    # Run the collapse algorithm
     is_primary = _collapse_hits(chrom_ids, starts, ends, similarities)
 
-    df_out = hits_df.with_columns(
-        is_primary=pl.Series(is_primary, dtype=pl.UInt32)
-    )
+    # Add primary indicator column to original DataFrame
+    df_out = hits_df.with_columns(is_primary=pl.Series(is_primary, dtype=pl.UInt32))
 
     return df_out
 
 
-def intersect_hits(hits_dfs, relaxed):
+def intersect_hits(
+    hits_dfs: List[Union[pl.DataFrame, pl.LazyFrame]], relaxed: bool
+) -> pl.DataFrame:
+    """Intersect hit datasets across multiple runs to find common hits.
+
+    This function finds hits that appear consistently across multiple Fi-NeMo
+    runs, which can be useful for identifying robust motif instances that are
+    not sensitive to parameter variations or random initialization.
+
+    Parameters
+    ----------
+    hits_dfs : List[Union[pl.DataFrame, pl.LazyFrame]]
+        List of hit DataFrames from different Fi-NeMo runs. Each DataFrame must
+        contain the columns specified by the intersection criteria. LazyFrames
+        will be collected before processing.
+    relaxed : bool
+        If True, uses relaxed intersection criteria with only motif names and
+        untrimmed coordinates. If False, uses strict criteria including all
+        coordinate and metadata columns.
+
+    Returns
+    -------
+    pl.DataFrame
+        DataFrame containing hits that appear in all input datasets.
+        Columns from later datasets are suffixed with their index (e.g., '_1', '_2').
+        The first dataset's columns retain their original names.
+
+    Raises
+    ------
+    ValueError
+        If fewer than one hits DataFrame is provided.
+    KeyError
+        If required columns for the specified intersection criteria are missing
+        from any of the input DataFrames.
+
+    Notes
+    -----
+    Relaxed intersection is useful when comparing results across different
+    region definitions or motif trimming parameters, but may produce less
+    precise matches. Strict intersection requires identical region definitions
+    and is recommended for most use cases.
+
+    The intersection columns used are:
+    - Relaxed: ["chr", "start_untrimmed", "end_untrimmed", "motif_name", "strand"]
+    - Strict: ["chr", "start", "end", "start_untrimmed", "end_untrimmed",
+               "motif_name", "strand", "peak_name", "peak_id"]
+
+    The function performs successive inner joins starting with the first DataFrame,
+    so the final result contains only hits present in all input datasets.
+
+    Examples
+    --------
+    >>> common_hits = intersect_hits([hits_df1, hits_df2], relaxed=False)
+    >>> print(f"Found {common_hits.height} hits common to both runs")
+    >>>
+    >>> # Compare relaxed vs strict intersection
+    >>> relaxed_hits = intersect_hits([hits_df1, hits_df2], relaxed=True)
+    >>> strict_hits = intersect_hits([hits_df1, hits_df2], relaxed=False)
+    >>> print(f"Relaxed: {relaxed_hits.height}, Strict: {strict_hits.height}")
+    """
     if relaxed:
-        join_cols = [
-            "chr", "start_untrimmed", "end_untrimmed",
-            "motif_name", "strand"
-        ]
+        # Relaxed criteria: only motif identity and untrimmed positions
+        join_cols = ["chr", "start_untrimmed", "end_untrimmed", "motif_name", "strand"]
     else:
+        # Strict criteria: all coordinate and metadata columns
         join_cols = [
-            "chr", "start", "end", "start_untrimmed", "end_untrimmed",
-            "motif_name", "strand", "peak_name", "peak_id"
+            "chr",
+            "start",
+            "end",
+            "start_untrimmed",
+            "end_untrimmed",
+            "motif_name",
+            "strand",
+            "peak_name",
+            "peak_id",
         ]
 
     if len(hits_dfs) < 1:
         raise ValueError("At least one hits dataframe required")
 
-    hits_df = hits_dfs[0]
-    for i in range(1, len(hits_dfs)):
+    # Ensure all DataFrames are collected
+    collected_dfs = []
+    for df in hits_dfs:
+        if isinstance(df, pl.LazyFrame):
+            collected_dfs.append(df.collect())
+        else:
+            collected_dfs.append(df)
+
+    # Start with first DataFrame and successively intersect with others
+    hits_df = collected_dfs[0]
+    for i in range(1, len(collected_dfs)):
         hits_df = hits_df.join(
-            hits_dfs[i],
+            collected_dfs[i],
             on=join_cols,
             how="inner",
             suffix=f"_{i}",
             join_nulls=True,
-            coalesce=True
+            coalesce=True,
         )
 
-    return hits_df
\ No newline at end of file
+    return hits_df
diff --git a/src/finemo/templates/report.html b/src/finemo/templates/report.html
index 8465ead..a04417b 100644
--- a/src/finemo/templates/report.html
+++ b/src/finemo/templates/report.html
@@ -178,15 +178,24 @@
 </head>
 
 <body>
-<h1>Fi-NeMo hit calling report</h1>
+<h1>Fi-NeMo Motif Hit Calling Report</h1>
+
+<p>
+    This report provides a comprehensive analysis of motif instance calling results from Fi-NeMo, 
+    a GPU-accelerated method for identifying transcription factor binding sites using neural network
+    contribution scores. Fi-NeMo uses a competitive optimization approach to comprehensively map motif
+    instances by solving a sparse linear reconstruction problem. The report compares Fi-NeMo hits 
+    with TF-MoDISCo seqlets (when available) and provides detailed statistics on hit quality and 
+    motif discovery performance.
+</p>
 
 {% if not use_seqlets %}
 <blockquote>
-    Seqlet comparisons are not shown because a TF-MoDISco H5 file with seqlet data is not provided.
+    <strong>Note:</strong> Seqlet comparisons are not shown because a TF-MoDISco H5 file with seqlet data was not provided.
 </blockquote>
 {% elif not compute_recall %}
 <blockquote>
-    Seqlet recall and other statistics directly comparing hits and seqlets are not computed because the <code>-n/--no-recall</code> argument is set.
+    <strong>Note:</strong> Seqlet recall and other statistics directly comparing hits and seqlets are not computed because the <code>-n/--no-recall</code> argument was specified.
 </blockquote>
 {% endif %}
 
@@ -198,66 +207,69 @@ <h2>TF-MoDISco seqlet comparisons</h2>
 
 <h3>Hit vs. seqlet counts</h3>
 <p>
-    This figure shows the number of hits called vs. the number of TF-MoDISco seqlets identified for each motif. 
-    The dashed line is the identity line. 
-    When comparing a shared set of regions, the hit counts should be mostly greater than the corresponding seqlet counts, since TF-MoDISco stringently filters seqlets and usually uses a smaller input window. 
+    This scatter plot compares the number of motif instances called by Fi-NeMo versus the number of TF-MoDISCo seqlets 
+    identified for each motif. The dashed line represents perfect agreement (<code>y = x</code>). Fi-NeMo typically identifies 
+    an order of magnitude more motif instances than TF-MoDISCo because: (1) TF-MoDISCo applies stringent filtering criteria 
+    during seqlet identification, and (2) TF-MoDISCo often analyzes smaller genomic windows than those used for hit calling.
 </p>
 <img src="hit_vs_seqlet_counts.svg" width="780">
 {% endif %}
 
-<h3>Hit and seqlet motif comparisons</h3>
+<h3>Motif-specific hit and seqlet analysis</h3>
 <p>
-    For each motif, this table examines the consistency between hits and TF-MoDISco seqlets. 
+    This table provides detailed statistics for each motif, comparing the consistency between Fi-NeMo hits 
+    and TF-MoDISCo seqlets. The analysis includes hit counts, overlap statistics, and visual comparisons 
+    of contribution weight matrices (CWMs).
 </p>
 
 <p>
-    The following statistics report the number of hits, seqlets, and their relationships: 
+    <strong>Statistical measures include:</strong>
 </p>
 <ul>
-    <li><strong>Hits</strong>: The number of hits called by Fi-NeMo</li>
+    <li><strong>Hits</strong>: Total number of motif instances called by Fi-NeMo across all genomic regions</li>
     {% if compute_recall %}
-    <li><strong>Restricted Hits</strong>: The number of Fi-NeMo hits within the TF-MoDISco input regions</li>
+    <li><strong>Restricted Hits</strong>: Fi-NeMo hits overlapping with TF-MoDISco input regions (enables direct comparison)</li>
     {% endif %}
     {% if use_seqlets %}
-    <li><strong>Seqlets</strong>: The number of unique TF-MoDISco seqlets</li>
+    <li><strong>Seqlets</strong>: Unique TF-MoDISco seqlets used to construct this motif pattern</li>
     {% endif %}
     {% if compute_recall %}
-    <li><strong>Hit/Seqlet Overlaps</strong>: The number of hits that coincide with TF-MoDISco seqlets</li>
-    <li><strong>Missed Seqlets</strong>: The number of TF-MoDISco seqlets not called as hits</li>
-    <li><strong>Additional Restricted Hits</strong>: The number of hits within the TF-MoDISco input regions that are not identified as seqlets by TF-MoDISco</li>
-    <li><strong>Seqlet Recall</strong>: The fraction of seqlets that are called as hits</li>
+    <li><strong>Hit/Seqlet Overlaps</strong>: Fi-NeMo hits that spatially coincide with TF-MoDISco seqlets (successful recovery)</li>
+    <li><strong>Missed Seqlets</strong>: TF-MoDISco seqlets not identified as hits by Fi-NeMo (potential false negatives)</li>
+    <li><strong>Additional Restricted Hits</strong>: Fi-NeMo hits not identified as seqlets by TF-MoDISco (potential new discoveries)</li>
+    <li><strong>Seqlet Recall</strong>: Fraction of TF-MoDISco seqlets successfully recovered as Fi-NeMo hits</li>
     {% endif %}
-    <li><strong>Hit-Seqlet CWM Similarity</strong>: The cosine similarity between the hit CWM and the TF-MoDISco CWM</li>
+    <li><strong>Hit-Seqlet CWM Similarity</strong>: Cosine similarity between average contribution scores of hits vs. seqlets</li>
 </ul>
 <p>
-    Note that the seqlet counts here may be lower than those shown in the tfmodisco-lite report due to double-counting in overlapping regions.
-    The seqlet counts shown here are unique while the counts in the tfmodisco-lite report are not de-duplicated.
-</p>
-{% if compute_recall %}
-<p>
-    Note that palindromic motifs may have lower recall due to disagreements on orientation.
-    If seqlet recall is near zero for all motifs, the <code>-W/--modisco-region-width</code> argument is likely incorrect.
-    This value is required to infer genomic coordinates of seqlets from the tfmodisco-lite output H5.
-</p>
-{% endif %}
-<p>
-    Motif CWMs (contribution weight matrices) are average contribution scores over a set of regions. The CWMs plotted here are:
+    <strong>Important notes:</strong>
 </p>
 <ul>
-    <li><strong>Hit CWM (FC)</strong>: The forward-strand CWM of all hits</li>
-    <li><strong>Hit CWM (RC)</strong>: The reverse-strand CWM of all hits</li>
-    <li><strong>TF-MoDISco CWM (FC/RC)</strong>: The CWM of all TF-MoDISco seqlets</li>
+    <li>Seqlet counts may appear lower than in TF-MoDISco-lite reports due to removal of duplicate seqlets</li>
     {% if compute_recall %}
-    <li><strong>Missed-Seqlet-Only CWM</strong>: The CWM of all TF-MoDISco seqlets that were not called as hits</li>
-    <li><strong>Additional-Restricted-Hit CWM</strong>: The CWM of all hits within the TF-MoDISco input regions that were not identified as seqlets by TF-MoDISco</li>
+    <li>Palindromic motifs may show reduced recall due to strand orientation ambiguity</li>
+    <li>If seqlet recall is near zero across all motifs, verify that the <code>-W/--modisco-region-width</code> parameter matches the original TF-MoDISco analysis window</li>
     {% endif %}
 </ul>
 <p>
-    The plots span the full untrimmed motif, with the trimmed motif shaded.
+    <strong>Contribution Weight Matrix (CWM) visualizations:</strong><br>
+    CWMs represent average contribution scores across motif instances and show the functional importance 
+    of each nucleotide position. The following CWMs are displayed for comparison:
 </p>
+<ul>
+    <li><strong>Hit CWM (FC/RC)</strong>: Average contribution patterns from Fi-NeMo hits on forward/reverse strands</li>
+    <li><strong>TF-MoDISco CWM (FC/RC)</strong>: Average contribution patterns from TF-MoDISco seqlets on forward/reverse strands</li>
+    {% if compute_recall %}
+    <li><strong>Missed-Seqlet-Only CWM</strong>: Contribution patterns from seqlets not recovered by Fi-NeMo (identifies potential algorithmic disagreements)</li>
+    <li><strong>Additional-Restricted-Hit CWM</strong>: Contribution patterns from Fi-NeMo hits not identified by TF-MoDISco</li>
+    {% endif %}
+</ul>
 <p>
-    The hit-seqlet similarity is the cosine similarity between the additional-restricted-hits CWM and the seqlet CWM. 
-    This statistic measures the similarity between hits that were missed by TF-MoDISco and the seqlets used to construct the motif.
+    All CWM plots span the full untrimmed motif width, with the core trimmed region highlighted by shading.
+    {% if compute_recall %}
+    The hit-seqlet CWM similarity quantifies the overall agreement between Fi-NeMo's discovered instances 
+    and TF-MoDISco's original motif definitions.
+    {% endif %}
 </p>
 
 <table class="wide_table">
@@ -325,38 +337,44 @@ <h3>Hit and seqlet motif comparisons</h3>
 
 {% if compute_recall %}
 
-<h3>Seqlet-hit confusion matrix</h3>
+<h3>Motif cross-assignment analysis</h3>
+<p>
+    This confusion matrix identifies cases where Fi-NeMo hits of one motif type spatially overlap with 
+    TF-MoDISco seqlets of different motif types. Such cross-assignments can reveal related motif families,
+    algorithm differences, or cases where similar-looking motifs compete for the same binding sites.
+</p>
 <p>
-    This heatmap shows the prevalence of motifs whose (untrimmed) hits overlap with TF-MoDISco seqlets of other motifs.
-    The vertical axis shows the motif of the seqlet, while the horizontal axis shows the motif of the hit.
-    The color intensity here represents an estimator of the expected number of bases of hit overlap per base of seqlet. 
+    The y-axis represents seqlet motif identity, the x-axis represents hit motif identity, and color intensity 
+    indicates the estimated overlap frequency per base of seqlet sequence. High off-diagonal values suggest
+    potential motif ambiguity and/or algorithmic disagreements at groups of putative TF binding sites.
 </p>
 <img src="seqlet_confusion.svg" width="780">
 
 {% endif %}
 
-<h2>Hit statistic distributions</h2>
+<h2>Hit Quality and Distribution Analysis</h2>
 <p>
-    The following figures visualize the distribution of hit statistics across motifs and regions. 
+    These visualizations examine the quality and distribution of Fi-NeMo hits across genomic regions and motifs,
+    measuring algorithm performance and signal strength.
 </p>
 
-<h3>Overall distribution of hit counts per region</h3>
+<h3>Genome-wide hit density</h3>
 <p>
-    This plot shows the distribution of hit counts per region for any motif.
-    The number of regions with no hits should be near zero. 
+    This histogram shows the distribution of total hit counts per genomic region (across all motifs). 
+    A good distribution should show nearly all regions containing at least one hit.
 </p>
 <img src="total_hit_distribution.svg" width="780">
 
-<h3>Per-motif distributions of hit statistics</h3>
+<h3>Motif-specific hit quality metrics</h3>
 <p>
-    These plots show the distribution of hit statistics for each motif, specifically:
-    <ul>
-        <li><strong>Hits Per Region</strong>: The number of hits per region for each motif</li>
-        <li><strong>Hit Coefficient</strong>: The the distribution of hit coefficients for each motif</li>
-        <li><strong>Hit Similarity</strong>: The distribution of hit-CWM similarity for each motif</li>
-        <li><strong>Hit Importance</strong>: The distribution of the total importance of hits for each motif</li>
-    </ul>
+    These distribution plots characterize the quality and prevalence of hits for individual motifs:
 </p>
+<ul>
+    <li><strong>Hits Per Region</strong>: Frequency of motif occurrence across genomic regions (higher values suggest more prevalent motifs)</li>
+    <li><strong>Hit Coefficient</strong>: Strength of motif instance assignment by the optimization algorithm (higher values indicate stronger matches)</li>
+    <li><strong>Hit Similarity</strong>: Cosine similarity between individual hits and the motif CWM (higher values indicate closer pattern matching)</li>
+    <li><strong>Hit Importance</strong>: Total contribution score magnitude within hit regions (reflects functional significance from the neural network model)</li>
+</ul>
 <table>
     <thead>
         <tr>
@@ -380,11 +398,16 @@ <h3>Per-motif distributions of hit statistics</h3>
     </tbody>
 </table>
 
-<h3>Motif co-occurrence</h3>
+<h3>Motif co-occurrence analysis</h3>
+<p>
+    This correlation heatmap reveals which motifs tend to occur together in the same genomic regions, 
+    potentially indicating cooperative transcription factor binding or shared regulatory mechanisms.
+    Color intensity represents cosine similarity between motif occurrence patterns, where occurrence 
+    is defined as the presence of at least one hit for each motif within individual regions.
+</p>
 <p>
-    This heatmap shows the co-occurrence of motifs across regions.
-    The color intensity here represents the cosine similarity between the motifs' occurrence across regions,
-    where occurence is defined as the presence of a hit for a motif in a region.
+    High positive correlations (dark colors) suggest motifs that frequently co-occur. 
+    Low correlations suggest independent or mutually exclusive binding patterns.
 </p>
 <img src="motif_cooocurrence.svg" width="780">
 
diff --git a/src/finemo/visualization.py b/src/finemo/visualization.py
index fd348a1..6346857 100644
--- a/src/finemo/visualization.py
+++ b/src/finemo/visualization.py
@@ -1,18 +1,57 @@
+"""Visualization module for generating plots and reports for Fi-NeMo results.
+
+This module provides functions for:
+- Plotting motif contribution weight matrices (CWMs) as sequence logos
+- Generating distribution plots for hit statistics
+- Creating co-occurrence heatmaps
+- Producing HTML reports with interactive visualizations
+- Plotting confusion matrices and performance metrics
+"""
+
 import os
-import importlib
+import importlib.resources
+from typing import List, Optional, Dict, Any, Tuple, Union, Mapping, Iterable
 
 import numpy as np
+from numpy import ndarray
 import matplotlib.pyplot as plt
+from matplotlib.axes import Axes
 from matplotlib.patheffects import AbstractPathEffect
 from matplotlib.textpath import TextPath
 from matplotlib.transforms import Affine2D
 from matplotlib.font_manager import FontProperties
 from jinja2 import Template
+import polars as pl
+from jaxtyping import Float, Int
 
 from . import templates
 
 
-def abbreviate_motif_name(name):
+def abbreviate_motif_name(name: str) -> str:
+    """Convert TF-MoDISCo motif names to abbreviated format.
+
+    Converts full TF-MoDISCo pattern names to shorter, more readable format
+    for display in plots and reports.
+
+    Parameters
+    ----------
+    name : str
+        Full motif name (e.g., 'pos_patterns.pattern_0').
+
+    Returns
+    -------
+    str
+        Abbreviated name (e.g., '+/0') or original name if parsing fails.
+
+    Examples
+    --------
+    >>> abbreviate_motif_name('pos_patterns.pattern_0')
+    '+/0'
+    >>> abbreviate_motif_name('neg_patterns.pattern_1')
+    '-/1'
+    >>> abbreviate_motif_name('invalid_name')
+    'invalid_name'
+    """
     try:
         group, motif = name.split(".")
         if group == "pos_patterns":
@@ -23,14 +62,44 @@ def abbreviate_motif_name(name):
             raise Exception
         motif_num = motif.split("_")[1]
         return f"{group_short}/{motif_num}"
-    except:
+    except Exception:
         return name
 
 
-def plot_hit_stat_distributions(hits_df, motif_names, plot_dir):
-    hits_df = hits_df.collect()
-    hits_by_motif = hits_df.partition_by("motif_name", as_dict=True)
-    dummy_df = hits_df.clear()
+def plot_hit_stat_distributions(
+    hits_df: pl.LazyFrame, motif_names: List[str], plot_dir: str
+) -> None:
+    """Plot distributions of hit statistics for each motif.
+
+    Creates separate histogram plots for coefficient, similarity, and importance
+    score distributions for each motif. Saves plots in both PNG (high-res) and
+    SVG (vector) formats.
+
+    Parameters
+    ----------
+    hits_df : pl.LazyFrame
+        Lazy DataFrame containing hit data with required columns:
+        - motif_name : str, name of the motif
+        - hit_coefficient_global : float, global coefficient values
+        - hit_similarity : float, similarity scores to motif CWM
+        - hit_importance : float, importance scores from attribution
+    motif_names : List[str]
+        List of motif names to generate plots for. Motifs not present
+        in hits_df will result in empty histograms.
+    plot_dir : str
+        Directory path where plots will be saved. Creates subdirectory
+        'motif_stat_distributions' if it doesn't exist.
+
+    Notes
+    -----
+    For each motif, creates three separate plots:
+    - {motif_name}_coefficients.{png,svg} : coefficient distribution
+    - {motif_name}_similarities.{png,svg} : similarity distribution
+    - {motif_name}_importances.{png,svg} : importance distribution
+    """
+    hits_df_collected = hits_df.collect()
+    hits_by_motif = hits_df_collected.partition_by("motif_name", as_dict=True)
+    dummy_df = hits_df_collected.clear()
 
     motifs_dir = os.path.join(plot_dir, "motif_stat_distributions")
     os.makedirs(motifs_dir, exist_ok=True)
@@ -42,6 +111,7 @@ def plot_hit_stat_distributions(hits_df, motif_names, plot_dir):
 
         fig, ax = plt.subplots(figsize=(5, 2))
 
+        # Plot coefficient distribution
         ax.hist(coefficients, bins=50, density=True)
 
         output_path_png = os.path.join(motifs_dir, f"{m}_coefficients.png")
@@ -52,6 +122,7 @@ def plot_hit_stat_distributions(hits_df, motif_names, plot_dir):
 
         fig, ax = plt.subplots(figsize=(5, 2))
 
+        # Plot similarity distribution
         ax.hist(similarities, bins=50, density=True)
 
         output_path_png = os.path.join(motifs_dir, f"{m}_similarities.png")
@@ -62,6 +133,7 @@ def plot_hit_stat_distributions(hits_df, motif_names, plot_dir):
 
         fig, ax = plt.subplots(figsize=(5, 2))
 
+        # Plot importance distribution
         ax.hist(importances, bins=50, density=True)
 
         output_path_png = os.path.join(motifs_dir, f"{m}_importances.png")
@@ -71,7 +143,34 @@ def plot_hit_stat_distributions(hits_df, motif_names, plot_dir):
         plt.close(fig)
 
 
-def plot_hit_peak_distributions(occ_df, motif_names, plot_dir):
+def plot_hit_peak_distributions(
+    occ_df: pl.DataFrame, motif_names: List[str], plot_dir: str
+) -> None:
+    """Plot distribution of hits per peak for each motif.
+
+    Creates bar plots showing the frequency distribution of hit counts per peak
+    for each motif, plus an overall distribution of total hits per peak.
+
+    Parameters
+    ----------
+    occ_df : pl.DataFrame
+        DataFrame containing motif occurrence counts per peak. Expected to have:
+        - One column per motif name with integer hit counts
+        - 'total' column with sum of all motif hits per peak
+        - Each row represents a peak/genomic region
+    motif_names : List[str]
+        List of motif names corresponding to columns in occ_df.
+    plot_dir : str
+        Directory to save plots. Creates 'motif_hit_distributions' subdirectory.
+
+    Notes
+    -----
+    Generates the following plots:
+    - Individual motif hit distributions: {motif_name}.{png,svg}
+    - Overall hit distribution: total_hit_distribution.{png,svg}
+
+    Bar plots show frequency (proportion) on y-axis and hit count on x-axis.
+    """
     motifs_dir = os.path.join(plot_dir, "motif_hit_distributions")
     os.makedirs(motifs_dir, exist_ok=True)
 
@@ -92,7 +191,7 @@ def plot_hit_peak_distributions(occ_df, motif_names, plot_dir):
         plt.savefig(output_path_svg)
 
         plt.close(fig)
-    
+
     fig, ax = plt.subplots(figsize=(8, 4))
 
     unique, counts = np.unique(occ_df.get_column("total"), return_counts=True)
@@ -114,16 +213,41 @@ def plot_hit_peak_distributions(occ_df, motif_names, plot_dir):
     plt.close(fig)
 
 
-def plot_peak_motif_indicator_heatmap(peak_hit_counts, motif_names, output_dir):
-    """
-    Plots a simple indicator heatmap of the motifs in each peak.
+def plot_peak_motif_indicator_heatmap(
+    peak_hit_counts: Int[ndarray, "M M"], motif_names: List[str], output_dir: str
+) -> None:
+    """Plot co-occurrence heatmap showing motif associations across peaks.
+
+    Creates a normalized correlation heatmap showing how frequently pairs of
+    motifs co-occur within the same genomic peaks. Values are normalized by
+    the geometric mean of individual motif frequencies.
+
+    Parameters
+    ----------
+    peak_hit_counts : Int[ndarray, "M M"]
+        Co-occurrence matrix where M = len(motif_names).
+        Entry (i,j) represents the number of peaks containing both motif i and j.
+        Diagonal entries represent total peaks containing each individual motif.
+    motif_names : List[str]
+        List of motif names for axis labels. Order must match matrix dimensions.
+    output_dir : str
+        Directory path where the heatmap plots will be saved.
+
+    Notes
+    -----
+    Saves plots as:
+    - motif_cooocurrence.png : High-resolution raster format
+    - motif_cooocurrence.svg : Vector format
+
+    The heatmap uses correlation normalization: matrix[i,j] / sqrt(matrix[i,i] * matrix[j,j])
+    Colors use the 'Greens' colormap with values typically in [0, 1] range.
     """
     cov_norm = 1 / np.sqrt(np.diag(peak_hit_counts))
     matrix = peak_hit_counts * cov_norm[:, None] * cov_norm[None, :]
     motif_keys = [abbreviate_motif_name(m) for m in motif_names]
 
-    fig, ax = plt.subplots(figsize=(8, 8), layout='constrained')
-    
+    fig, ax = plt.subplots(figsize=(8, 8), layout="constrained")
+
     # Plot the heatmap
     cax = ax.imshow(matrix, interpolation="nearest", aspect="equal", cmap="Greens")
 
@@ -135,26 +259,55 @@ def plot_peak_motif_indicator_heatmap(peak_hit_counts, motif_names, output_dir):
     ax.set_xlabel("Motif i")
     ax.set_ylabel("Motif j")
 
-    ax.tick_params(axis='both', labelsize=8)
+    ax.tick_params(axis="both", labelsize=8)
 
     cbar = fig.colorbar(cax, ax=ax, orientation="vertical", shrink=0.6, aspect=30)
-    cbar.ax.tick_params(labelsize=8) 
-    
+    cbar.ax.tick_params(labelsize=8)
+
     output_path_png = os.path.join(output_dir, "motif_cooocurrence.png")
     plt.savefig(output_path_png, dpi=300)
     output_path_svg = os.path.join(output_dir, "motif_cooocurrence.svg")
-    plt.savefig(output_path_svg)
+    plt.savefig(output_path_svg, dpi=300)
 
     plt.close()
 
 
-def plot_seqlet_confusion_heatmap(seqlet_confusion, motif_names, output_dir):
+def plot_seqlet_confusion_heatmap(
+    seqlet_confusion: Int[ndarray, "M M"], motif_names: List[str], output_dir: str
+) -> None:
+    """Plot confusion matrix heatmap comparing seqlets to hit calls.
+
+    Creates a heatmap showing the overlap between TF-MoDISCo seqlets and
+    Fi-NeMo hit calls. Rows represent seqlet motifs, columns represent hit motifs.
+
+    Parameters
+    ----------
+    seqlet_confusion : Int[ndarray, "M M"]
+        Confusion matrix where M = len(motif_names).
+        Entry (i,j) represents the number of seqlets of motif i that overlap
+        with hits called for motif j.
+    motif_names : List[str]
+        List of motif names for axis labels. Order must match matrix dimensions.
+    output_dir : str
+        Directory path where the confusion matrix plots will be saved.
+
+    Notes
+    -----
+    Saves plots as:
+    - seqlet_confusion.png : High-resolution raster format
+    - seqlet_confusion.svg : Vector format
+
+    The heatmap uses 'Blues' colormap. Perfect agreement would show a diagonal
+    pattern with high values along the diagonal and low off-diagonal values.
+    """
     motif_keys = [abbreviate_motif_name(m) for m in motif_names]
 
-    fig, ax = plt.subplots(figsize=(8, 8), layout='constrained')
-    
+    fig, ax = plt.subplots(figsize=(8, 8), layout="constrained")
+
     # Plot the heatmap
-    cax = ax.imshow(seqlet_confusion, interpolation="nearest", aspect="equal", cmap="Blues")
+    cax = ax.imshow(
+        seqlet_confusion, interpolation="nearest", aspect="equal", cmap="Blues"
+    )
 
     # Set axes on heatmap
     ax.set_yticks(np.arange(len(motif_keys)))
@@ -164,23 +317,47 @@ def plot_seqlet_confusion_heatmap(seqlet_confusion, motif_names, output_dir):
     ax.set_xlabel("Hit motif")
     ax.set_ylabel("Seqlet motif")
 
-    ax.tick_params(axis='both', labelsize=8)
+    ax.tick_params(axis="both", labelsize=8)
 
     cbar = fig.colorbar(cax, ax=ax, orientation="vertical", shrink=0.6, aspect=30)
-    cbar.ax.tick_params(labelsize=8) 
+    cbar.ax.tick_params(labelsize=8)
 
     output_path_png = os.path.join(output_dir, "seqlet_confusion.png")
     plt.savefig(output_path_png, dpi=300)
     output_path_svg = os.path.join(output_dir, "seqlet_confusion.svg")
-    plt.savefig(output_path_svg)
+    plt.savefig(output_path_svg, dpi=300)
 
     plt.close()
 
 
 class LogoGlyph(AbstractPathEffect):
-    def __init__(self, glyph, ref_glyph='E', font_props=None,
-                 offset=(0., 0.), **kwargs):
+    """Path effect for creating sequence logo glyphs with normalized dimensions.
+
+    This class creates properly scaled and positioned text glyphs for sequence
+    logos by normalizing character dimensions and applying appropriate transforms.
+
+    Parameters
+    ----------
+    glyph : str
+        Single character to render (e.g., 'A', 'C', 'G', 'T').
+    ref_glyph : str, default 'E'
+        Reference character used for width normalization.
+    font_props : FontProperties, optional
+        Font properties for the glyph rendering.
+    offset : Tuple[float, float], default (0., 0.)
+        Offset for glyph positioning.
+    **kwargs
+        Additional graphics collection parameters.
+    """
 
+    def __init__(
+        self,
+        glyph: str,
+        ref_glyph: str = "E",
+        font_props: Optional[FontProperties] = None,
+        offset: Tuple[float, float] = (0.0, 0.0),
+        **kwargs,
+    ) -> None:
         super().__init__(offset)
 
         path_orig = TextPath((0, 0), glyph, size=1, prop=font_props)
@@ -205,16 +382,81 @@ def __init__(self, glyph, ref_glyph='E', font_props=None,
         #: The dictionary of keywords to update the graphics collection with.
         self._gc = kwargs
 
-    def draw_path(self, renderer, gc, tpath, affine, rgbFace):
+    def draw_path(self, renderer, gc, tpath, affine, rgbFace) -> Any:  # type: ignore[override]
+        """Draw the glyph path using the renderer.
+
+        Parameters
+        ----------
+        renderer : matplotlib renderer
+            The renderer to draw with.
+        gc : GraphicsContext
+            Graphics context for drawing properties.
+        tpath : Path
+            Original text path (unused, using self.path instead).
+        affine : Transform
+            Affine transformation to apply.
+        rgbFace : color
+            Face color for the glyph.
+
+        Returns
+        -------
+        Any
+            Result from renderer.draw_path.
+        """
         return renderer.draw_path(gc, self.path, affine, rgbFace)
 
 
-def plot_logo(ax, heights, glyphs, colors=None, font_props=None, shade_bounds=None):
+def plot_logo(
+    ax: Axes,
+    heights: Float[ndarray, "B W"],
+    glyphs: Iterable[str],
+    colors: Optional[Mapping[str, Optional[str]]] = None,
+    font_props: Optional[FontProperties] = None,
+    shade_bounds: Optional[Tuple[int, int]] = None,
+) -> None:
+    """Plot sequence logo from contribution weight matrix.
+
+    Creates a sequence logo visualization where letter heights represent
+    the contribution or information content at each position. Supports
+    both positive and negative contributions with proper stacking.
+
+    Parameters
+    ----------
+    ax : Axes
+        Matplotlib axes object to plot on.
+    heights : Float[ndarray, "B W"]
+        Height matrix where B = len(glyphs) and W = motif width.
+        Entry (i,j) represents the height/contribution of base i at position j.
+        Can contain both positive and negative values.
+    glyphs : Iterable[str]
+        Sequence of base characters corresponding to rows in heights matrix.
+        Typically ['A', 'C', 'G', 'T'] for DNA.
+    colors : Dict[str, str], optional
+        Color mapping for each base. Keys should match glyphs.
+        If None, all bases will use default matplotlib colors.
+    font_props : FontProperties, optional
+        Font properties for letter rendering. If None, uses default font.
+    shade_bounds : Tuple[int, int], optional
+        (start, end) position indices to shade in background.
+        Useful for highlighting core motif regions.
+
+    Notes
+    -----
+    Positive and negative contributions are handled separately:
+    - Positive values are stacked above zero line in order of descending absolute value
+    - Negative values are stacked below zero line in order of descending absolute value
+    - A horizontal line is drawn at y=0 for reference
+
+    The resulting plot has:
+    - X-axis: Position in motif (0-indexed)
+    - Y-axis: Contribution magnitude
+    - Bar width: 0.95 (small gaps between positions)
+    """
     if colors is None:
         colors = {g: None for g in glyphs}
 
     ax.margins(x=0, y=0)
-    
+
     pos_values = np.clip(heights, 0, None)
     neg_values = np.clip(heights, None, 0)
     pos_order = np.argsort(pos_values, axis=0)
@@ -222,47 +464,106 @@ def plot_logo(ax, heights, glyphs, colors=None, font_props=None, shade_bounds=No
     pos_reorder = np.argsort(pos_order, axis=0)
     neg_reorder = np.argsort(neg_order, axis=0)
     pos_offsets = np.take_along_axis(
-        np.cumsum(
-            np.take_along_axis(pos_values, pos_order, axis=0), axis=0
-        ), pos_reorder, axis=0
+        np.cumsum(np.take_along_axis(pos_values, pos_order, axis=0), axis=0),
+        pos_reorder,
+        axis=0,
     )
     neg_offsets = np.take_along_axis(
-        np.cumsum(
-            np.take_along_axis(neg_values, neg_order, axis=0), axis=0
-        ), neg_reorder, axis=0
+        np.cumsum(np.take_along_axis(neg_values, neg_order, axis=0), axis=0),
+        neg_reorder,
+        axis=0,
     )
     bottoms = pos_offsets + neg_offsets - heights
 
     x = np.arange(heights.shape[1])
 
     for glyph, height, bottom in zip(glyphs, heights, bottoms):
-        ax.bar(x, height, 0.95, bottom=bottom, 
-               path_effects=[LogoGlyph(glyph, font_props=font_props)], color=colors[glyph])
+        ax.bar(
+            x,
+            height,
+            0.95,
+            bottom=bottom,
+            path_effects=[LogoGlyph(glyph, font_props=font_props)],
+            color=colors[glyph],
+        )
 
     if shade_bounds is not None:
         start, end = shade_bounds
-        ax.axvspan(start - 0.5, end - 0.5, color='0.9', zorder=-1)
+        ax.axvspan(start - 0.5, end - 0.5, color="0.9", zorder=-1)
 
-    ax.axhline(zorder=-1, linewidth=0.5, color='black')
+    ax.axhline(zorder=-1, linewidth=0.5, color="black")
 
 
-LOGO_ALPHABET = 'ACGT'
-LOGO_COLORS = {"A": '#109648', "C": '#255C99', "G": '#F7B32B', "T": '#D62839'}
+LOGO_ALPHABET = "ACGT"
+LOGO_COLORS = {"A": "#109648", "C": "#255C99", "G": "#F7B32B", "T": "#D62839"}
 LOGO_FONT = FontProperties(weight="bold")
 
 
-def plot_cwms(cwms, trim_bounds, out_dir, alphabet=LOGO_ALPHABET, colors=LOGO_COLORS, font=LOGO_FONT):
+def plot_cwms(
+    cwms: Dict[str, Dict[str, Float[ndarray, "4 W"]]],
+    trim_bounds: Dict[str, Dict[str, Tuple[int, int]]],
+    out_dir: str,
+    alphabet: str = LOGO_ALPHABET,
+    colors: Dict[str, str] = LOGO_COLORS,
+    font: FontProperties = LOGO_FONT,
+) -> None:
+    """Plot contribution weight matrices as sequence logos.
+
+    Creates sequence logo plots for all motifs and CWM types, with optional
+    shading to highlight trimmed regions. Saves plots in both PNG and SVG formats.
+
+    Parameters
+    ----------
+    cwms : Dict[str, Dict[str, Float[ndarray, "4 W"]]]
+        Nested dictionary structure: {motif_name: {cwm_type: cwm_array}}.
+        Each cwm_array has shape (4, W) where W is motif width.
+        Rows correspond to bases in alphabet order.
+    trim_bounds : Dict[str, Dict[str, Tuple[int, int]]]
+        Nested dictionary: {motif_name: {cwm_type: (start, end)}}.
+        Defines regions to shade in the sequence logos.
+    out_dir : str
+        Output directory where motif subdirectories will be created.
+    alphabet : str, default LOGO_ALPHABET
+        DNA alphabet string, typically 'ACGT'.
+    colors : Dict[str, str], default LOGO_COLORS
+        Color mapping for DNA bases. Keys should match alphabet characters.
+    font : FontProperties, default LOGO_FONT
+        Font properties for sequence logo rendering.
+
+    Notes
+    -----
+    Directory structure created:
+    ```
+    out_dir/
+    ├── motif1/
+    │   ├── cwm_type1.png
+    │   ├── cwm_type1.svg
+    │   └── ...
+    └── motif2/
+        └── ...
+    ```
+
+    Each plot is 10x2 inches with trimmed regions shaded if specified.
+    Spines (plot borders) are hidden for cleaner appearance.
+    """
     for m, v in cwms.items():
         motif_dir = os.path.join(out_dir, m)
         os.makedirs(motif_dir, exist_ok=True)
         for cwm_type, cwm in v.items():
             fig, ax = plt.subplots(figsize=(10, 2))
 
-            plot_logo(ax, cwm, alphabet, colors=colors, font_props=font, shade_bounds=trim_bounds[m][cwm_type])
+            plot_logo(
+                ax,
+                cwm,
+                alphabet,
+                colors=colors,
+                font_props=font,
+                shade_bounds=trim_bounds[m][cwm_type],
+            )
 
             for name, spine in ax.spines.items():
                 spine.set_visible(False)
-            
+
             output_path_png = os.path.join(motif_dir, f"{cwm_type}.png")
             plt.savefig(output_path_png, dpi=100)
             output_path_svg = os.path.join(motif_dir, f"{cwm_type}.svg")
@@ -271,7 +572,36 @@ def plot_cwms(cwms, trim_bounds, out_dir, alphabet=LOGO_ALPHABET, colors=LOGO_CO
             plt.close(fig)
 
 
-def plot_hit_vs_seqlet_counts(recall_data, output_dir):
+def plot_hit_vs_seqlet_counts(
+    recall_data: Dict[str, Dict[str, Union[int, float]]], output_dir: str
+) -> None:
+    """Plot scatter plot comparing hit counts to seqlet counts per motif.
+
+    Creates a log-log scatter plot showing the relationship between the number
+    of hits called by Fi-NeMo and the number of seqlets identified by TF-MoDISCo
+    for each motif. Includes diagonal reference line and motif annotations.
+
+    Parameters
+    ----------
+    recall_data : Dict[str, Dict[str, Union[int, float]]]
+        Dictionary with motif names as keys and metrics dictionaries as values.
+        Each metrics dictionary must contain:
+        - 'num_hits_total' : int, total number of hits for the motif
+        - 'num_seqlets' : int, total number of seqlets for the motif
+    output_dir : str
+        Directory path where the scatter plot will be saved.
+
+    Notes
+    -----
+    Saves plots as:
+    - hit_vs_seqlet_counts.png : High-resolution raster format
+    - hit_vs_seqlet_counts.svg : Vector format
+
+    Plot features:
+    - Log-log scale on both axes
+    - Diagonal reference line (y = x) as dashed line
+    - Points annotated with abbreviated motif names
+    """
     x = []
     y = []
     m = []
@@ -282,15 +612,15 @@ def plot_hit_vs_seqlet_counts(recall_data, output_dir):
 
     lim = max(np.amax(x), np.amax(y))
 
-    fig, ax = plt.subplots(figsize=(8, 8), layout='constrained')
+    fig, ax = plt.subplots(figsize=(8, 8), layout="constrained")
     ax.axline((0, 0), (lim, lim), color="0.3", linewidth=0.7, linestyle=(0, (5, 5)))
     ax.scatter(x, y, s=5)
     for i, txt in enumerate(m):
         short = abbreviate_motif_name(txt)
         ax.annotate(short, (x[i], y[i]), fontsize=8, weight="bold")
 
-    ax.set_yscale('log')
-    ax.set_xscale('log')
+    ax.set_yscale("log")
+    ax.set_xscale("log")
 
     ax.set_xlabel("Hits per motif")
     ax.set_ylabel("Seqlets per motif")
@@ -303,11 +633,59 @@ def plot_hit_vs_seqlet_counts(recall_data, output_dir):
     plt.close()
 
 
-def write_report(report_df, motif_names, out_path, compute_recall, use_seqlets):
-    template_str = importlib.resources.files(templates).joinpath('report.html').read_text()
+def write_report(
+    report_df: pl.DataFrame,
+    motif_names: List[str],
+    out_path: str,
+    compute_recall: bool,
+    use_seqlets: bool,
+) -> None:
+    """Generate and write HTML report from motif analysis results.
+
+    Creates a comprehensive HTML report with tables and visualizations
+    summarizing the Fi-NeMo motif discovery and hit calling results.
+
+    Parameters
+    ----------
+    report_df : pl.DataFrame
+        DataFrame containing motif statistics and performance metrics.
+        Expected columns depend on compute_recall and use_seqlets flags.
+    motif_names : List[str]
+        List of motif names to include in the report.
+        Order determines presentation sequence in the report.
+    out_path : str
+        File path where the HTML report will be written.
+        Parent directory must exist.
+    compute_recall : bool
+        Whether recall metrics were computed and should be included
+        in the report template.
+    use_seqlets : bool
+        Whether TF-MoDISCo seqlet data was used in the analysis
+        and should be referenced in the report.
+
+    Notes
+    -----
+    Uses Jinja2 templating with the report.html template from the
+    templates package. The template receives:
+    - report_data: Iterator of DataFrame rows as named tuples
+    - motif_names: List of motif names
+    - compute_recall: Boolean flag for recall metrics
+    - use_seqlets: Boolean flag for seqlet usage
+
+    Raises
+    ------
+    OSError
+        If the output path cannot be written.
+    """
+    template_str = (
+        importlib.resources.files(templates).joinpath("report.html").read_text()
+    )
     template = Template(template_str)
-    report = template.render(report_data=report_df.iter_rows(named=True), 
-                             motif_names=motif_names, compute_recall=compute_recall, 
-                             use_seqlets=use_seqlets)
+    report = template.render(
+        report_data=report_df.iter_rows(named=True),
+        motif_names=motif_names,
+        compute_recall=compute_recall,
+        use_seqlets=use_seqlets,
+    )
     with open(out_path, "w") as f:
-        f.write(report)
\ No newline at end of file
+        f.write(report)

From e9cd90f4eaa81edb815282e4085a13a00a21f2b0 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 31 Aug 2025 13:51:08 -0700
Subject: [PATCH 15/39] Methods diagram

---
 README.md          | 8 ++++----
 assets/methods.svg | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 assets/methods.svg

diff --git a/README.md b/README.md
index 9231afd..c2c9bbc 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 
 ## Overview
 
-Fi-NeMo implements a competitive optimization approach using proximal gradient descent to identify motif instances by solving a sparse linear reconstruction problem. Unlike traditional sequence-based methods, Fi-NeMo leverages context-aware importance scores from deep neural networks to accurately map transcription factor binding sites, enabling the discovery of both high-confidence canonical motifs and low-prevalence cofactor motifs that are often missed by conventional approaches.
+Fi-NeMo implements a competitive optimization approach using proximal gradient descent to identify motif instances by solving a sparse linear reconstruction problem. Unlike traditional sequence-based methods, Fi-NeMo leverages context-aware importance scores from deep neural networks to comprehensively map transcription factor binding sites, enabling the identification of both high-confidence canonical motifs and low-prevalence cofactor motifs that are often missed by conventional approaches.
 
 The algorithm represents contribution scores as weighted combinations of motif contribution weight matrices (CWMs) at specific genomic positions. This competitive assignment process more closely reflects the biological reality of transcription factors competing for binding sites, resulting in superior sensitivity and specificity compared to sequence-only methods.
 
@@ -18,9 +18,9 @@ The algorithm represents contribution scores as weighted combinations of motif c
 
 ## Method
 
-Fi-NeMo solves motif instance calling as an optimization problem that reconstructs contribution score tracks as sparse linear combinations of motif CWMs. The algorithm formulates this as an L1-regularized linear model.
+Fi-NeMo solves motif instance calling as an optimization problem that reconstructs contribution score tracks as sparse linear combinations of motif CWMs, formulated as an L1-regularized linear model. This competitive assignment encourages overlapping motif instances to be resolved in a meaningful way, with stronger matches receiving higher coefficients while weaker or redundant matches are suppressed.
 
-This competitive assignment encourages overlapping motif instances to be resolved in a meaningful way, with stronger matches receiving higher coefficients while weaker or redundant matches are suppressed.
+![Methods diagram](/assets/methods.svg | width=100)
 
 ## References
 
@@ -92,7 +92,7 @@ Recommended:
 
 Fi-NeMo provides a command-line utility named `finemo` for motif instance calling and analysis. The typical workflow involves three main steps:
 
-1. **Preprocessing**: Transform input contributions and sequences into a compressed format
+1. **Preprocessing**: Transform input contributions and sequences into a unified format
 2. **Hit Calling**: Identify motif instances using the Fi-NeMo algorithm  
 3. **Reporting and Analysis**: Generate visualizations and perform post-processing
 
diff --git a/assets/methods.svg b/assets/methods.svg
new file mode 100644
index 0000000..90ff4ea
--- /dev/null
+++ b/assets/methods.svg
@@ -0,0 +1 @@
+<svg width="866" height="802" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="733" y="1062" width="189" height="81"/></clipPath><clipPath id="clip1"><rect x="733" y="1062" width="189" height="81"/></clipPath></defs><g transform="translate(-687 -554)"><g><rect x="690" y="559" width="861" height="796" fill="#FFFFFF" fill-opacity="1"/><g><g><g><g><g><g><g><g><path d="M0 134.542 780.202 134.542 780.202 0 0 0Z" fill="#FFFFFF" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M13.8702 68.5123 766.333 68.5123" stroke="#000000" stroke-width="0.963214" stroke-linecap="square" stroke-linejoin="round" stroke-miterlimit="4" stroke-opacity="1" fill="none" fill-rule="nonzero" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M119.942 78.0139 109.348 78.0139 107.675 68.5123 100.86 68.5123 110.592 120.671 118.675 120.671 128.407 68.5123 121.598 68.5123 119.942 78.0139ZM111.037 87.6946 118.235 87.6946 114.642 108.442 111.037 87.6946Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M264.925 69.4932 254.331 69.4932 252.658 68.5123 245.843 68.5123 255.575 73.8975 263.657 73.8975 273.39 68.5123 266.581 68.5123 264.925 69.4932ZM256.02 70.4926 263.218 70.4926 259.625 72.6349 256.02 70.4926Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M322.918 69.1751 312.324 69.1751 310.651 68.5123 303.836 68.5123 313.569 72.1498 321.651 72.1498 331.383 68.5123 324.574 68.5123 322.918 69.1751ZM314.014 69.85 321.211 69.85 317.618 71.2971 314.014 69.85Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M351.915 58.5584 341.321 58.5584 339.648 68.5123 332.833 68.5123 342.565 13.8702 350.647 13.8702 360.38 68.5123 353.57 68.5123 351.915 58.5584ZM343.01 48.417 350.208 48.417 346.615 26.6818 343.01 48.417Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M496.899 68.7048 486.303 68.7048 484.633 68.5123 477.816 68.5123 487.549 69.5697 495.63 69.5697 505.363 68.5123 498.555 68.5123 496.899 68.7048ZM487.994 68.901 495.189 68.901 491.599 69.3218 487.994 68.901Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M525.896 64.0521 515.299 64.0521 513.629 68.5123 506.812 68.5123 516.545 44.0288 524.627 44.0288 534.359 68.5123 527.551 68.5123 525.896 64.0521ZM516.991 59.5079 524.186 59.5079 520.595 49.7691 516.991 59.5079Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M612.884 68.3561 602.292 68.3561 600.618 68.5123 593.801 68.5123 603.534 67.6551 611.615 67.6551 621.348 68.5123 614.54 68.5123 612.884 68.3561ZM603.979 68.1968 611.179 68.1968 607.583 67.8562 603.979 68.1968Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M728.869 71.5257 718.276 71.5257 716.603 68.5123 709.79 68.5123 719.523 85.0546 727.604 85.0546 737.337 68.5123 730.525 68.5123 728.869 71.5257ZM719.968 74.5959 727.163 74.5959 723.573 81.1763 719.968 74.5959Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M41.4171 68.3278C39.8809 68.389 38.2823 68.4349 36.6212 68.4655 34.9601 68.4965 33.225 68.5123 31.4158 68.5123 26.0207 68.5123 21.7477 68.3966 18.5968 68.1658 15.4458 67.9349 13.8702 67.6218 13.8702 67.2271 13.8702 66.8307 15.4458 66.5171 18.5968 66.2862 21.7477 66.0554 26.0207 65.9398 31.4158 65.9398 33.225 65.9398 34.9601 65.9555 36.6212 65.9866 38.2823 66.0176 39.8809 66.0635 41.4171 66.1243L41.4171 66.6372C39.867 66.5562 38.3401 66.4973 36.8363 66.4595 35.3326 66.4221 33.7501 66.4032 32.089 66.4032 29.1093 66.4032 26.7657 66.4766 25.0583 66.6228 23.3556 66.7686 22.5042 66.9702 22.5042 67.2271 22.5042 67.4827 23.3556 67.6839 25.0583 67.8301 26.7657 67.9759 29.1093 68.0488 32.089 68.0488 33.7501 68.0488 35.3326 68.0303 36.8363 67.9925 38.3401 67.9547 39.867 67.8953 41.4171 67.8148L41.4171 68.3278Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M157.403 67.0782C155.868 67.552 154.269 67.9088 152.607 68.1487 150.947 68.3912 149.212 68.5123 147.402 68.5123 142.007 68.5123 137.734 67.6141 134.583 65.8174 131.432 64.0206 129.857 61.5863 129.857 58.5139 129.857 55.4302 131.432 52.9918 134.583 51.1978 137.734 49.4015 142.007 48.5029 147.402 48.5029 149.212 48.5029 150.947 48.6244 152.607 48.8665 154.269 49.1063 155.868 49.4632 157.403 49.9374L157.403 53.926C155.854 53.2978 154.326 52.8375 152.823 52.5455 151.319 52.2534 149.737 52.1072 148.075 52.1072 145.096 52.1072 142.752 52.6764 141.045 53.8144 139.342 54.9497 138.491 56.516 138.491 58.5139 138.491 60.5032 139.342 62.0673 141.045 63.2053 142.752 64.3406 145.096 64.908 148.075 64.908 149.737 64.908 151.319 64.7622 152.823 64.4697 154.326 64.175 155.854 63.7133 157.403 63.0851L157.403 67.0782Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M447.37 66.608C445.833 67.237 444.235 67.7109 442.574 68.0294 440.913 68.3512 439.177 68.5123 437.369 68.5123 431.973 68.5123 427.7 67.3194 424.549 64.9341 421.398 62.5483 419.823 59.3157 419.823 55.2358 419.823 51.1415 421.398 47.9035 424.549 45.5214 427.7 43.1358 431.973 41.943 437.369 41.943 439.177 41.943 440.913 42.104 442.574 42.426 444.235 42.7443 445.833 43.2181 447.37 43.8475L447.37 49.1437C445.819 48.3094 444.293 47.6984 442.789 47.3105 441.285 46.9226 439.703 46.7287 438.042 46.7287 435.062 46.7287 432.718 47.4846 431.011 48.9956 429.308 50.503 428.457 52.5833 428.457 55.2358 428.457 57.8776 429.308 59.9542 431.011 61.4653 432.718 62.9727 435.062 63.7264 438.042 63.7264 439.703 63.7264 441.285 63.5324 442.789 63.1445 444.293 62.7531 445.819 62.1402 447.37 61.306L447.37 66.608Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M99.4105 68.0429C97.5584 68.1995 95.6361 68.317 93.6441 68.3953 91.6521 68.4736 89.5943 68.5123 87.4705 68.5123 82.6715 68.5123 78.8706 68.2792 76.0677 67.8134 73.2649 67.3477 71.8636 66.7164 71.8636 65.92 71.8636 65.1141 73.2896 64.4796 76.142 64.0175 78.9979 63.5554 82.91 63.3241 87.8777 63.3241 89.7919 63.3241 91.6255 63.3556 93.3786 63.4186 95.1362 63.4807 96.7925 63.5729 98.3485 63.6962L98.3485 64.7302C96.743 64.5723 95.1465 64.4544 93.5577 64.3766 91.9688 64.2978 90.3764 64.2587 88.7794 64.2587 85.824 64.2587 83.5463 64.4022 81.9448 64.6893 80.3438 64.9759 79.5433 65.3863 79.5433 65.92 79.5433 66.4487 80.315 66.8573 81.8584 67.1457 83.4018 67.4337 85.5936 67.5777 88.4334 67.5777 89.2074 67.5777 89.9255 67.5696 90.5883 67.5529 91.2507 67.5358 91.8455 67.5097 92.3725 67.4737L92.3725 66.5027 87.8345 66.5027 87.8345 65.6378 99.4105 65.6378 99.4105 68.0429Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M418.373 63.6985C416.521 65.3031 414.599 66.5085 412.607 67.3144 410.615 68.1131 408.557 68.5123 406.433 68.5123 401.634 68.5123 397.833 66.1238 395.031 61.3465 392.228 56.5691 390.826 50.0954 390.826 41.9258 390.826 33.6608 392.252 27.158 395.104 22.4174 397.961 17.6767 401.873 15.3064 406.841 15.3064 408.754 15.3064 410.588 15.6288 412.342 16.2735 414.099 16.911 415.755 17.8599 417.311 19.1202L417.311 29.7262C415.706 28.1069 414.109 26.8979 412.52 26.0992 410.932 25.2932 409.339 24.8902 407.742 24.8902 404.787 24.8902 402.509 26.363 400.908 29.3085 399.307 32.2467 398.506 36.4524 398.506 41.9258 398.506 47.3478 399.278 51.5389 400.821 54.4992 402.365 57.452 404.557 58.9283 407.396 58.9283 408.17 58.9283 408.888 58.8442 409.551 58.6754 410.214 58.4999 410.808 58.2286 411.335 57.8623L411.335 47.9049 406.797 47.9049 406.797 39.0353 418.373 39.0353 418.373 63.6985Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M650.344 67.7829C648.495 68.0258 646.573 68.2085 644.58 68.3309 642.587 68.452 640.53 68.5123 638.406 68.5123 633.605 68.5123 629.808 68.1505 627.004 67.4265 624.201 66.7025 622.797 65.722 622.797 64.4841 622.797 63.2314 624.223 62.2464 627.076 61.5278 629.933 60.8096 633.844 60.4506 638.811 60.4506 640.728 60.4506 642.56 60.4992 644.315 60.5973 646.07 60.6935 647.73 60.8375 649.282 61.0283L649.282 62.6356C647.68 62.3899 646.083 62.2068 644.495 62.0858 642.906 61.9638 641.313 61.9026 639.716 61.9026 636.76 61.9026 634.483 62.1258 632.881 62.5722 631.279 63.0172 630.478 63.6548 630.478 64.4841 630.478 65.3053 631.252 65.9407 632.795 66.3888 634.339 66.8366 636.53 67.0602 639.369 67.0602 640.143 67.0602 640.859 67.0471 641.525 67.0219 642.186 66.9954 642.78 66.954 643.307 66.8987L643.307 65.3899 638.771 65.3899 638.771 64.0458 650.344 64.0458 650.344 67.7829Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M708.341 69.5179C706.487 69.1827 704.566 68.9307 702.572 68.7624 700.579 68.5955 698.523 68.5123 696.399 68.5123 691.602 68.5123 687.8 69.0113 684.997 70.0089 682.193 71.0069 680.794 72.3591 680.794 74.0654 680.794 75.7915 682.22 77.1495 685.069 78.1399 687.926 79.1298 691.841 79.6252 696.808 79.6252 698.721 79.6252 700.552 79.5577 702.307 79.4232 704.066 79.29 705.722 79.0915 707.279 78.8283L707.279 76.6131C705.673 76.9515 704.075 77.2039 702.487 77.3708 700.899 77.5391 699.306 77.6233 697.708 77.6233 694.752 77.6233 692.475 77.3155 690.873 76.7004 689.271 76.0866 688.47 75.2083 688.47 74.0654 688.47 72.9328 689.244 72.0576 690.788 71.4393 692.331 70.8224 694.522 70.5142 697.362 70.5142 698.136 70.5142 698.856 70.5317 699.517 70.5668 700.179 70.6037 700.773 70.66 701.299 70.7365L701.299 72.8162 696.763 72.8162 696.763 74.6688 708.341 74.6688 708.341 69.5179Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M42.8669 75.4063 70.4138 75.4063 70.4138 74.0618 60.5032 74.0618 60.5032 68.5123 52.7965 68.5123 52.7965 74.0618 42.8669 74.0618 42.8669 75.4063Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M158.853 62.1146 186.4 62.1146 186.4 63.3623 176.49 63.3623 176.49 68.5123 168.783 68.5123 168.783 63.3623 158.853 63.3623 158.853 62.1146Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M187.85 74.257 215.397 74.257 215.397 73.1366 205.486 73.1366 205.486 68.5123 197.78 68.5123 197.78 73.1366 187.85 73.1366 187.85 74.257Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M216.846 44.8874 244.393 44.8874 244.393 49.4951 234.483 49.4951 234.483 68.5123 226.776 68.5123 226.776 49.4951 216.846 49.4951 216.846 44.8874Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M274.84 71.2152 302.386 71.2152 302.386 70.6883 292.476 70.6883 292.476 68.5123 284.77 68.5123 284.77 70.6883 274.84 70.6883 274.84 71.2152Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M361.83 20.9087 389.376 20.9087 389.376 30.1927 379.466 30.1927 379.466 68.5123 371.759 68.5123 371.759 30.1927 361.83 30.1927 361.83 20.9087Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M448.819 76.8169 476.367 76.8169 476.367 75.197 466.454 75.197 466.454 68.5123 458.751 68.5123 458.751 75.197 448.819 75.197 448.819 76.8169Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M535.808 45.1988 563.356 45.1988 563.356 49.7453 553.447 49.7453 553.447 68.5123 545.739 68.5123 545.739 49.7453 535.808 49.7453 535.808 45.1988Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M564.805 62.2909 592.352 62.2909 592.352 63.5045 582.443 63.5045 582.443 68.5123 574.735 68.5123 574.735 63.5045 564.805 63.5045 564.805 62.2909Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M651.798 58.711 679.34 58.711 679.34 60.6225 669.432 60.6225 669.432 68.5123 661.724 68.5123 661.724 60.6225 651.798 60.6225 651.798 58.711Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/><path d="M738.786 83.5395 766.333 83.5395 766.333 80.6088 756.421 80.6088 756.421 68.5123 748.717 68.5123 748.717 80.6088 738.786 80.6088 738.786 83.5395Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00988 0 0 1 726.142 859.131)"/></g></g></g><path d="M348.813 682.108 354.39 671.313 376.7 671.313 371.122 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M340.267 698.3 345.844 687.505 368.154 687.505 362.576 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M331.721 714.493 337.298 704.148 359.608 704.148 354.03 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M379.848 682.108 385.426 671.313 407.735 671.313 402.158 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M371.752 698.3 377.33 687.505 399.639 687.505 394.062 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M363.206 714.493 368.784 704.148 391.093 704.148 385.516 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M410.884 682.108 416.461 671.313 438.771 671.313 433.194 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M402.338 698.3 407.915 687.505 430.225 687.505 424.648 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#B6D7A8" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M393.792 714.493 399.369 704.148 421.679 704.148 416.102 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M441.919 682.108 447.497 671.313 469.807 671.313 464.229 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M433.373 698.3 439.041 687.505 461.71 687.505 456.043 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M425.277 714.493 430.855 704.148 453.164 704.148 447.587 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M473.405 682.108 478.982 671.313 501.292 671.313 495.714 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M464.859 698.3 470.436 687.505 492.746 687.505 487.168 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M456.313 714.493 461.89 704.148 484.2 704.148 478.622 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M503.991 682.108 509.568 671.313 531.878 671.313 526.3 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M495.445 698.3 501.022 687.505 523.332 687.505 517.754 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M487.348 714.493 492.926 704.148 515.235 704.148 509.658 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M535.476 682.108 541.053 671.313 563.363 671.313 557.786 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M526.93 698.3 532.507 687.505 554.817 687.505 549.24 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M518.834 714.493 524.411 704.148 546.721 704.148 541.143 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M535.476 682.108 541.053 671.313 563.363 671.313 557.786 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#6AA84F" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M526.93 698.3 532.507 687.505 554.817 687.505 549.24 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M518.834 714.493 524.411 704.148 546.721 704.148 541.143 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M566.961 682.108 572.539 671.313 594.848 671.313 589.271 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M558.415 698.3 563.993 687.505 586.302 687.505 580.725 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M549.869 714.493 555.447 704.148 577.756 704.148 572.179 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M597.547 682.108 603.214 671.313 625.884 671.313 620.217 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M589.451 698.3 595.028 687.505 617.338 687.505 611.76 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M580.905 714.493 586.482 704.148 608.792 704.148 603.214 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M629.032 682.108 634.61 671.313 656.919 671.313 651.342 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M620.486 698.3 626.064 687.505 648.373 687.505 642.796 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M611.94 714.493 617.518 704.148 639.827 704.148 634.25 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M660.068 682.108 665.645 671.313 687.955 671.313 682.378 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M651.522 698.3 657.099 687.505 679.409 687.505 673.832 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M642.976 714.493 648.643 704.148 671.313 704.148 665.645 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#274E13" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M691.104 682.108 696.681 671.313 718.991 671.313 713.413 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M682.558 698.3 688.135 687.505 710.445 687.505 704.867 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M674.012 714.493 679.589 704.148 701.899 704.148 696.321 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M722.589 682.108 728.166 671.313 750.476 671.313 744.899 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M714.043 698.3 719.62 687.505 741.93 687.505 736.353 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M705.497 714.493 711.074 704.148 733.384 704.148 727.807 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M753.625 682.108 759.202 671.313 781.512 671.313 775.934 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M745.078 698.3 750.746 687.505 773.415 687.505 767.748 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M736.982 714.493 742.56 704.148 764.869 704.148 759.292 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M785.11 682.108 790.687 671.313 812.997 671.313 807.419 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M776.564 698.3 782.141 687.505 804.451 687.505 798.873 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#6AA84F" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M768.018 714.493 773.595 704.148 795.905 704.148 790.327 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M816.145 682.108 821.723 671.313 844.033 671.313 838.455 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M807.599 698.3 813.177 687.505 835.486 687.505 829.909 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M799.053 714.493 804.631 704.148 826.94 704.148 821.363 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M847.181 682.108 852.758 671.313 875.068 671.313 869.491 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M838.635 698.3 844.212 687.505 866.522 687.505 860.945 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M830.089 714.493 835.666 704.148 857.976 704.148 852.399 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M878.217 682.108 883.794 671.313 906.104 671.313 900.526 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M869.671 698.3 875.248 687.505 897.558 687.505 891.98 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M861.574 714.493 867.152 704.148 889.461 704.148 883.884 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M909.252 682.108 914.83 671.313 937.139 671.313 931.562 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M900.706 698.3 906.284 687.505 928.593 687.505 923.016 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M892.16 714.493 897.738 704.148 920.047 704.148 914.47 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M940.738 682.108 946.315 671.313 968.625 671.313 963.047 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M932.192 698.3 937.769 687.505 960.079 687.505 954.501 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M923.646 714.493 929.223 704.148 951.533 704.148 945.955 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M940.738 682.108 946.315 671.313 968.625 671.313 963.047 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#B6D7A8" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M932.192 698.3 937.769 687.505 960.079 687.505 954.501 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M923.646 714.493 929.223 704.148 951.533 704.148 945.955 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M972.223 682.108 977.8 671.313 1000.11 671.313 994.533 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M963.677 698.3 969.254 687.505 991.564 687.505 985.987 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M955.131 714.493 960.708 704.148 983.018 704.148 977.44 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M1002.81 682.108 1008.39 671.313 1030.7 671.313 1025.12 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M994.263 698.3 999.84 687.505 1022.15 687.505 1016.57 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M985.717 714.493 991.384 704.148 1014.05 704.148 1008.39 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M1033.84 682.108 1039.42 671.313 1061.73 671.313 1056.15 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M1025.75 698.3 1031.33 687.505 1053.64 687.505 1048.06 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M1017.2 714.493 1022.78 704.148 1045.09 704.148 1039.51 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M1065.33 682.108 1070.91 671.313 1093.22 671.313 1087.64 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M1056.78 698.3 1062.36 687.505 1084.67 687.505 1079.09 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M1048.24 714.493 1053.81 704.148 1076.12 704.148 1070.55 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#38761D" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M1095.92 682.108 1101.49 671.313 1123.8 671.313 1118.23 682.108Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M1087.37 698.3 1093.04 687.505 1115.71 687.505 1110.04 698.3Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M1079.27 714.493 1084.85 704.148 1107.16 704.148 1101.58 714.493Z" stroke="#595959" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#EEEEEE" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><g clip-path="url(#clip0)"><g clip-path="url(#clip1)"><path d="M0 79.8902 187.099 79.8902 187.099 0 0 0Z" fill="#FFFFFF" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M4.59138 73.1681 182.508 73.1681" stroke="#000000" stroke-width="0.318846" stroke-linecap="square" stroke-linejoin="round" stroke-miterlimit="4" stroke-opacity="1" fill="none" fill-rule="nonzero" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M24.2688 73.1663 13.344 73.1663 11.619 73.1681 4.59138 73.1681 14.6276 73.1596 22.962 73.1596 32.9982 73.1681 25.9764 73.1681 24.2688 73.1663ZM15.0864 73.165 22.509 73.165 18.8035 73.1614 15.0864 73.165Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M54.1708 73.0232 43.2459 73.0232 41.521 73.07 34.4933 73.07 44.5295 72.8144 52.864 72.8144 62.9002 73.07 55.8784 73.07 54.1708 73.0232ZM44.9883 72.976 52.4109 72.976 48.7054 72.8743 44.9883 72.976Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M84.0727 63.8141 73.1479 63.8141 71.4227 72.3249 64.395 72.3249 74.4312 25.6054 82.766 25.6054 92.8022 72.3249 85.7804 72.3249 84.0727 63.8141ZM74.8902 55.1427 82.3129 55.1427 78.6074 36.5593 74.8902 55.1427Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M113.975 74.0694 103.05 74.0694 101.325 73.7954 94.297 73.7954 104.333 75.2987 112.668 75.2987 122.704 73.7954 115.682 73.7954 113.975 74.0694ZM104.792 74.3484 112.215 74.3484 108.509 74.9464 104.792 74.3484Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M143.877 73.1771 132.952 73.1771 131.227 73.1681 124.199 73.1681 134.235 73.219 142.57 73.219 152.606 73.1681 145.584 73.1681 143.877 73.1771ZM134.694 73.1866 142.117 73.1866 138.411 73.2068 134.694 73.1866Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M173.778 72.8329 162.854 72.8329 161.129 73.1078 154.101 73.1078 164.137 71.5986 172.471 71.5986 182.508 73.1078 175.486 73.1078 173.778 72.8329ZM164.596 72.5526 172.018 72.5526 168.313 71.9523 164.596 72.5526Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M32.9982 73.1335C31.4141 73.1371 29.7655 73.1402 28.0526 73.142 26.3396 73.1438 24.5504 73.1452 22.6847 73.1452 17.1212 73.1452 12.7148 73.1375 9.46543 73.1231 6.21609 73.1083 4.59138 73.0885 4.59138 73.0637 4.59138 73.0385 6.21609 73.0187 9.46543 73.0043 12.7148 72.9895 17.1212 72.9823 22.6847 72.9823 24.5504 72.9823 26.3396 72.9832 28.0526 72.9854 29.7655 72.9872 31.4141 72.9899 32.9982 72.994L32.9982 73.0264C31.3998 73.0214 29.8252 73.0174 28.2745 73.0151 26.7237 73.0129 25.0919 73.0115 23.379 73.0115 20.3061 73.0115 17.8894 73.016 16.1287 73.0255 14.3729 73.0349 13.4949 73.0475 13.4949 73.0637 13.4949 73.0799 14.3729 73.0925 16.1287 73.102 17.8894 73.111 20.3061 73.1155 23.379 73.1155 25.0919 73.1155 26.7237 73.1146 28.2745 73.1123 29.8252 73.1096 31.3998 73.106 32.9982 73.1011L32.9982 73.1335Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M62.9002 73.1609C61.3159 73.1632 59.6676 73.165 57.9546 73.1663 56.2415 73.1672 54.4524 73.1681 52.5869 73.1681 47.023 73.1681 42.6167 73.1636 39.3674 73.1546 36.118 73.1461 34.4933 73.1339 34.4933 73.1191 34.4933 73.1038 36.118 73.0921 39.3674 73.0831 42.6167 73.0741 47.023 73.07 52.5869 73.07 54.4524 73.07 56.2415 73.0705 57.9546 73.0718 59.6676 73.0727 61.3159 73.0745 62.9002 73.0768L62.9002 73.0966C61.3019 73.0934 59.727 73.0912 58.1764 73.0898 56.6258 73.0885 54.9938 73.0876 53.2807 73.0876 50.2079 73.0876 47.7915 73.0903 46.0308 73.0961 44.2748 73.1015 43.3968 73.1092 43.3968 73.1191 43.3968 73.1285 44.2748 73.1362 46.0308 73.142 47.7915 73.1474 50.2079 73.1501 53.2807 73.1501 54.9938 73.1501 56.6258 73.1497 58.1764 73.1483 59.727 73.1465 61.3019 73.1443 62.9002 73.1416L62.9002 73.1609Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M92.8022 73.1569C91.2178 73.1609 89.5696 73.1636 87.8565 73.1654 86.1435 73.1672 84.3544 73.1681 82.4884 73.1681 76.9249 73.1681 72.5188 73.1614 69.2691 73.1474 66.0199 73.1339 64.395 73.1155 64.395 73.0921 64.395 73.0682 66.0199 73.0498 69.2691 73.0363 72.5188 73.0223 76.9249 73.0156 82.4884 73.0156 84.3544 73.0156 86.1435 73.0165 87.8565 73.0183 89.5696 73.0201 91.2178 73.0228 92.8022 73.0264L92.8022 73.057C91.2034 73.052 89.629 73.0484 88.0784 73.0466 86.5278 73.0444 84.8957 73.043 83.1827 73.043 80.1098 73.043 77.693 73.0475 75.9327 73.0561 74.1765 73.0646 73.2986 73.0768 73.2986 73.0921 73.2986 73.1069 74.1765 73.1191 75.9327 73.1276 77.693 73.1362 80.1098 73.1407 83.1827 73.1407 84.8957 73.1407 86.5278 73.1393 88.0784 73.1371 89.629 73.1348 91.2034 73.1317 92.8022 73.1267L92.8022 73.1569Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M122.704 73.1294C121.12 73.1425 119.471 73.1519 117.759 73.1582 116.045 73.165 114.256 73.1681 112.39 73.1681 106.827 73.1681 102.42 73.1438 99.1711 73.0957 95.9218 73.0475 94.297 72.9827 94.297 72.9004 94.297 72.8176 95.9218 72.7523 99.1711 72.7042 102.42 72.6565 106.827 72.6322 112.39 72.6322 114.256 72.6322 116.045 72.6353 117.759 72.6421 119.471 72.6484 121.12 72.6578 122.704 72.6704L122.704 72.7775C121.105 72.7604 119.531 72.7483 117.98 72.7406 116.429 72.7325 114.798 72.7289 113.085 72.7289 110.012 72.7289 107.595 72.7438 105.835 72.7744 104.078 72.805 103.201 72.8468 103.201 72.9004 103.201 72.9535 104.078 72.9953 105.835 73.0259 107.595 73.0561 110.012 73.0714 113.085 73.0714 114.798 73.0714 116.429 73.0678 117.98 73.0597 119.531 73.052 121.105 73.0394 122.704 73.0228L122.704 73.1294Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M152.606 73.1389C151.022 73.1488 149.373 73.156 147.66 73.1605 145.947 73.1654 144.158 73.1681 142.292 73.1681 136.729 73.1681 132.322 73.1497 129.073 73.1137 125.824 73.0772 124.199 73.0277 124.199 72.9656 124.199 72.9035 125.824 72.854 129.073 72.818 132.322 72.7816 136.729 72.7636 142.292 72.7636 144.158 72.7636 145.947 72.7658 147.66 72.7708 149.373 72.7757 151.022 72.7829 152.606 72.7924L152.606 72.8729C151.007 72.8603 149.433 72.8509 147.882 72.845 146.331 72.8392 144.7 72.8365 142.987 72.8365 139.914 72.8365 137.497 72.8477 135.736 72.8707 133.98 72.8936 133.103 72.9256 133.103 72.9656 133.103 73.0061 133.98 73.0376 135.736 73.0606 137.497 73.0835 139.914 73.0952 142.987 73.0952 144.7 73.0952 146.331 73.0921 147.882 73.0862 149.433 73.0804 151.007 73.0709 152.606 73.0583L152.606 73.1389Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M182.508 66.7956C180.924 68.3827 179.275 69.5778 177.562 70.3805 175.849 71.1927 174.06 71.5986 172.194 71.5986 166.631 71.5986 162.224 68.5906 158.975 62.574 155.726 56.5574 154.101 48.4048 154.101 38.1158 154.101 27.7899 155.726 19.6234 158.975 13.6161 162.224 7.59962 166.631 4.59138 172.194 4.59138 174.06 4.59138 175.849 4.99743 177.562 5.80945 179.275 6.61225 180.924 7.80728 182.508 9.39442L182.508 22.7516C180.909 20.6477 179.335 19.1066 177.784 18.1285 176.233 17.1504 174.602 16.6613 172.889 16.6613 169.816 16.6613 167.399 18.5668 165.638 22.3779 163.882 26.1797 163.005 31.4257 163.005 38.1158 163.005 44.7782 163.882 50.0148 165.638 53.8261 167.399 57.6279 169.816 59.5286 172.889 59.5286 174.602 59.5286 176.233 59.0399 177.784 58.0617 179.335 57.074 180.909 55.5283 182.508 53.4247L182.508 66.7956Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M32.9982 73.1582C31.0883 73.1587 29.1062 73.1591 27.052 73.1591 24.9978 73.1596 22.8757 73.1596 20.6856 73.1596 15.7368 73.1596 11.8172 73.1587 8.9269 73.1578 6.03655 73.1564 4.59138 73.1546 4.59138 73.1524 4.59138 73.1501 6.06202 73.1483 9.00331 73.147 11.9488 73.1456 15.983 73.1452 21.1058 73.1452 23.0794 73.1452 24.9702 73.1452 26.7783 73.1452 28.5905 73.1456 30.2989 73.1456 31.9032 73.1461L31.9032 73.1488C30.2479 73.1483 28.6012 73.1483 26.9629 73.1479 25.3246 73.1479 23.6821 73.1474 22.0353 73.1474 18.9879 73.1474 16.6387 73.1479 14.9877 73.1488 13.3367 73.1497 12.5112 73.1506 12.5112 73.1524 12.5112 73.1537 13.307 73.1551 14.8986 73.1555 16.4902 73.1564 18.7502 73.1569 21.6788 73.1569 22.4767 73.1569 23.2173 73.1569 23.9007 73.1569 24.584 73.1569 25.1973 73.1569 25.7405 73.1564L25.7405 73.1537 21.0612 73.1537 21.0612 73.1515 32.9982 73.1515 32.9982 73.1582Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M62.9002 66.0842C60.9901 67.8368 59.0079 69.153 56.9538 70.0336 54.8997 70.9057 52.7776 71.3421 50.5876 71.3421 45.6388 71.3421 41.7191 68.7332 38.8288 63.5153 35.9385 58.297 34.4933 51.2266 34.4933 42.3033 34.4933 33.276 35.9639 26.1733 38.9052 20.9954 41.8507 15.8175 45.885 13.2285 51.0079 13.2285 52.9815 13.2285 54.8723 13.5807 56.6802 14.2849 58.4923 14.9812 60.2008 16.0176 61.805 17.3941L61.805 28.9784C60.15 27.2097 58.5031 25.8892 56.8647 25.0169 55.2264 24.1366 53.584 23.6964 51.9371 23.6964 48.8899 23.6964 46.5406 25.305 44.8896 28.5222 43.2386 31.7314 42.4131 36.3251 42.4131 42.3033 42.4131 48.2253 43.2089 52.8033 44.8005 56.0363 46.3921 59.2617 48.6523 60.874 51.5807 60.874 52.3785 60.874 53.1192 60.7822 53.8027 60.5982 54.4857 60.406 55.099 60.1099 55.6426 59.7099L55.6426 48.8336 50.9634 48.8336 50.9634 39.1461 62.9002 39.1461 62.9002 66.0842Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M92.8022 72.6794C90.8921 72.6911 88.9099 72.6997 86.8558 72.706 84.8017 72.7118 82.6796 72.7145 80.4896 72.7145 75.5408 72.7145 71.6211 72.697 68.7309 72.6619 65.8403 72.6273 64.395 72.5796 64.395 72.5197 64.395 72.4594 65.866 72.4117 68.807 72.3766 71.7525 72.342 75.7869 72.3249 80.9094 72.3249 82.883 72.3249 84.7742 72.3271 86.5822 72.3316 88.3943 72.3366 90.1028 72.3433 91.707 72.3528L91.707 72.4302C90.052 72.4185 88.4051 72.4095 86.7667 72.4036 85.1284 72.3978 83.486 72.3951 81.8391 72.3951 78.7919 72.3951 76.4426 72.4059 74.7916 72.4275 73.1407 72.4491 72.315 72.4797 72.315 72.5197 72.315 72.5593 73.111 72.5904 74.7025 72.6119 76.2941 72.6335 78.5543 72.6443 81.4827 72.6443 82.2805 72.6443 83.0211 72.6439 83.7047 72.6425 84.3877 72.6412 85.001 72.6394 85.5441 72.6367L85.5441 72.5633 80.8649 72.5633 80.8649 72.4986 92.8022 72.4986 92.8022 72.6794Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M122.704 69.8433C120.794 70.7729 118.812 71.4713 116.758 71.9383 114.704 72.4009 112.582 72.6322 110.392 72.6322 105.442 72.6322 101.523 71.2485 98.6325 68.4808 95.7423 65.713 94.297 61.9625 94.297 57.2292 94.297 52.4411 95.7679 48.6739 98.709 45.9273 101.654 43.1807 105.689 41.8075 110.811 41.8075 112.785 41.8075 114.676 41.9942 116.484 42.3678 118.296 42.7371 120.005 43.2868 121.609 44.017L121.609 50.1615C119.953 49.2233 118.307 48.5232 116.669 48.0601 115.03 47.5935 113.388 47.36 111.741 47.36 108.694 47.36 106.345 48.2131 104.694 49.9194 103.043 51.6217 102.217 54.0583 102.217 57.2292 102.217 60.3705 103.013 62.799 104.604 64.5138 106.196 66.2246 108.456 67.08 111.385 67.08 112.182 67.08 112.923 67.0309 113.606 66.9333 114.29 66.8316 114.903 66.6746 115.446 66.4622L115.446 60.6931 110.767 60.6931 110.767 55.5549 122.704 55.5549 122.704 69.8433Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M152.606 72.6934C150.696 72.7168 148.714 72.7343 146.66 72.746 144.606 72.7577 142.483 72.7636 140.293 72.7636 135.344 72.7636 131.425 72.7285 128.534 72.6592 125.644 72.5899 124.199 72.4959 124.199 72.3771 124.199 72.2569 125.669 72.1624 128.611 72.0936 131.556 72.0247 135.59 71.9905 140.713 71.9905 142.687 71.9905 144.578 71.995 146.386 72.0045 148.198 72.0139 149.906 72.0274 151.511 72.0459L151.511 72.1998C149.855 72.1764 148.209 72.1588 146.571 72.1471 144.932 72.1354 143.29 72.1296 141.643 72.1296 138.595 72.1296 136.247 72.1512 134.596 72.1939 132.944 72.2367 132.119 72.2974 132.119 72.3771 132.119 72.4558 132.914 72.517 134.506 72.5598 136.098 72.6025 138.358 72.6241 141.287 72.6241 142.084 72.6241 142.825 72.6227 143.508 72.6205 144.192 72.6178 144.805 72.6142 145.348 72.6088L145.348 72.4639 140.669 72.4639 140.669 72.3352 152.606 72.3352 152.606 72.6934Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M182.508 73.1627C180.598 73.1645 178.616 73.1659 176.562 73.1668 174.507 73.1677 172.385 73.1681 170.195 73.1681 165.246 73.1681 161.327 73.1654 158.436 73.16 155.546 73.1546 154.101 73.147 154.101 73.138 154.101 73.1285 155.571 73.1213 158.513 73.1159 161.458 73.1105 165.492 73.1078 170.615 73.1078 172.589 73.1078 174.48 73.1083 176.288 73.1087 178.1 73.1096 179.808 73.1105 181.413 73.1119L181.413 73.124C179.757 73.1222 178.111 73.1209 176.473 73.12 174.834 73.1191 173.191 73.1186 171.545 73.1186 168.497 73.1186 166.148 73.1204 164.497 73.1236 162.846 73.1267 162.021 73.1317 162.021 73.138 162.021 73.1438 162.816 73.1488 164.408 73.1519 166 73.1555 168.26 73.1573 171.188 73.1573 171.986 73.1573 172.727 73.1569 173.41 73.1569 174.094 73.1569 174.707 73.1564 175.25 73.156L175.25 73.1447 170.571 73.1447 170.571 73.1348 182.508 73.1348 182.508 73.1627Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M4.59138 21.5839 32.9982 21.5839 32.9982 31.608 22.7784 31.608 22.7784 72.9823 14.8311 72.9823 14.8311 31.608 4.59138 31.608 4.59138 21.5839Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M34.4933 71.3421 62.9002 71.3421 62.9002 71.6292 52.6804 71.6292 52.6804 72.8144 44.733 72.8144 44.733 71.6292 34.4933 71.6292 34.4933 71.3421Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M64.395 72.7145 92.8022 72.7145 92.8022 72.7735 82.582 72.7735 82.582 73.0156 74.635 73.0156 74.635 72.7735 64.395 72.7735 64.395 72.7145Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M94.297 73.7954 122.704 73.7954 122.704 73.673 112.484 73.673 112.484 73.1681 104.537 73.1681 104.537 73.673 94.297 73.673 94.297 73.7954Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M124.199 13.4484 152.606 13.4484 152.606 24.8657 142.386 24.8657 142.386 71.9905 134.439 71.9905 134.439 24.8657 124.199 24.8657 124.199 13.4484Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/><path d="M154.101 73.1992 182.508 73.1992 182.508 73.1933 172.288 73.1933 172.288 73.1681 164.34 73.1681 164.34 73.1933 154.101 73.1933 154.101 73.1992Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 733.73 1061.95)"/></g></g><path d="M348.457 509.932 537.63 509.932 537.63 591.605 348.457 591.605Z" stroke="#000000" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><g><g><path d="M0 79.8902 187.099 79.8902 187.099 0 0 0Z" fill="#FFFFFF" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M4.59138 73.5159 182.508 73.5159" stroke="#000000" stroke-width="0.318846" stroke-linecap="square" stroke-linejoin="round" stroke-miterlimit="4" stroke-opacity="1" fill="none" fill-rule="nonzero" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M24.2688 73.5159 13.344 73.5159 11.619 73.5159 4.59138 73.5159 14.6276 73.5155 22.962 73.5155 32.9982 73.5159 25.9764 73.5159 24.2688 73.5159ZM15.0864 73.5155 22.509 73.5155 18.8035 73.5155 15.0864 73.5155Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M54.1708 70.7261 43.2459 70.7261 41.521 72.0297 34.4933 72.0297 44.5295 64.8751 52.864 64.8751 62.9002 72.0297 55.8784 72.0297 54.1708 70.7261ZM44.9883 69.3982 52.4109 69.3982 48.7054 66.5526 44.9883 69.3982Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M84.0727 74.3214 73.1479 74.3214 71.4227 74.1036 64.395 74.1036 74.4312 75.2987 82.766 75.2987 92.8022 74.1036 85.7804 74.1036 84.0727 74.3214ZM74.8902 74.5432 82.3129 74.5432 78.6074 75.0184 74.8902 74.5432Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M113.975 73.5164 103.05 73.5164 101.325 73.5159 94.297 73.5159 104.333 73.5182 112.668 73.5182 122.704 73.5159 115.682 73.5159 113.975 73.5164ZM104.792 73.5168 112.215 73.5168 108.509 73.5177 104.792 73.5168Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M143.877 73.4705 132.952 73.4705 131.227 73.5159 124.199 73.5159 134.235 73.2667 142.57 73.2667 152.606 73.5159 145.584 73.5159 143.877 73.4705ZM134.694 73.4241 142.117 73.4241 138.411 73.3252 134.694 73.4241Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M173.778 62.5897 162.854 62.5897 161.129 73.1924 154.101 73.1924 164.137 14.9905 172.471 14.9905 182.508 73.1924 175.486 73.1924 173.778 62.5897ZM164.596 51.7877 172.018 51.7877 168.313 28.6367 164.596 51.7877Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M32.9982 73.515C31.4141 73.515 29.7655 73.515 28.0526 73.515 26.3396 73.5155 24.5504 73.5155 22.6847 73.5155 17.1212 73.5155 12.7148 73.515 9.46543 73.515 6.21609 73.5146 4.59138 73.5146 4.59138 73.5141 4.59138 73.5137 6.21609 73.5132 9.46543 73.5132 12.7148 73.5132 17.1212 73.5128 22.6847 73.5128 24.5504 73.5128 26.3396 73.5128 28.0526 73.5128 29.7655 73.5128 31.4141 73.5132 32.9982 73.5132L32.9982 73.5137C31.3998 73.5137 29.8252 73.5132 28.2745 73.5132 26.7237 73.5132 25.0919 73.5132 23.379 73.5132 20.3061 73.5132 17.8894 73.5132 16.1287 73.5137 14.3729 73.5137 13.4949 73.5137 13.4949 73.5141 13.4949 73.5141 14.3729 73.5146 16.1287 73.5146 17.8894 73.5146 20.3061 73.515 23.379 73.515 25.0919 73.515 26.7237 73.515 28.2745 73.5146 29.8252 73.5146 31.3998 73.5146 32.9982 73.5146L32.9982 73.515Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M62.9002 73.5177C61.3159 73.5173 59.6676 73.5168 57.9546 73.5164 56.2415 73.5159 54.4524 73.5159 52.5869 73.5159 47.023 73.5159 42.6167 73.5168 39.3674 73.5195 36.118 73.5218 34.4933 73.5249 34.4933 73.529 34.4933 73.5335 36.118 73.5366 39.3674 73.5389 42.6167 73.5411 47.023 73.5425 52.5869 73.5425 54.4524 73.5425 56.2415 73.5425 57.9546 73.542 59.6676 73.5416 61.3159 73.5411 62.9002 73.5407L62.9002 73.5353C61.3019 73.5362 59.727 73.5366 58.1764 73.5371 56.6258 73.5375 54.9938 73.5375 53.2807 73.5375 50.2079 73.5375 47.7915 73.5371 46.0308 73.5353 44.2748 73.5339 43.3968 73.5317 43.3968 73.529 43.3968 73.5267 44.2748 73.5245 46.0308 73.5231 47.7915 73.5213 50.2079 73.5204 53.2807 73.5204 54.9938 73.5204 56.6258 73.5209 58.1764 73.5213 59.727 73.5218 61.3019 73.5222 62.9002 73.5231L62.9002 73.5177Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M92.8022 69.9778C91.2178 70.3 89.5696 70.5425 87.8565 70.7054 86.1435 70.8706 84.3544 70.9529 82.4884 70.9529 76.9249 70.9529 72.5188 70.3423 69.2691 69.1211 66.0199 67.8994 64.395 66.2444 64.395 64.1556 64.395 62.0597 66.0199 60.402 69.2691 59.1825 72.5188 57.9609 76.9249 57.3503 82.4884 57.3503 84.3544 57.3503 86.1435 57.4326 87.8565 57.5977 89.5696 57.7606 91.2178 58.0032 92.8022 58.3253L92.8022 61.0369C91.2034 60.6099 89.629 60.2971 88.0784 60.0982 86.5278 59.8998 84.8957 59.8004 83.1827 59.8004 80.1098 59.8004 77.693 60.1873 75.9327 60.9608 74.1765 61.733 73.2986 62.7976 73.2986 64.1556 73.2986 65.5082 74.1765 66.5715 75.9327 67.345 77.693 68.1167 80.1098 68.5028 83.1827 68.5028 84.8957 68.5028 86.5278 68.4034 88.0784 68.2049 89.629 68.0042 91.2034 67.6906 92.8022 67.2636L92.8022 69.9778Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M152.606 69.5382C151.022 70.2824 149.373 70.8427 147.66 71.2188 145.947 71.5995 144.158 71.7898 142.292 71.7898 136.729 71.7898 132.322 70.3796 129.073 67.5592 125.824 64.7388 124.199 60.9167 124.199 56.0935 124.199 51.2527 125.824 47.4243 129.073 44.6083 132.322 41.7878 136.729 40.3776 142.292 40.3776 144.158 40.3776 145.947 40.5679 147.66 40.9486 149.373 41.325 151.022 41.8852 152.606 42.6292L152.606 48.8908C151.007 47.9044 149.433 47.1822 147.882 46.7237 146.331 46.2652 144.7 46.0357 142.987 46.0357 139.914 46.0357 137.497 46.9289 135.736 48.7157 133.98 50.4981 133.103 52.9572 133.103 56.0935 133.103 59.2167 133.98 61.6718 135.736 63.4582 137.497 65.2405 139.914 66.1319 142.987 66.1319 144.7 66.1319 146.331 65.9024 147.882 65.4439 149.433 64.9809 151.007 64.2564 152.606 63.2701L152.606 69.5382Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M182.508 73.4889C180.924 73.4966 179.275 73.5024 177.562 73.506 175.849 73.5101 174.06 73.5119 172.194 73.5119 166.631 73.5119 162.224 73.4975 158.975 73.4691 155.726 73.4403 154.101 73.4012 154.101 73.3521 154.101 73.3031 155.726 73.264 158.975 73.2356 162.224 73.2068 166.631 73.1924 172.194 73.1924 174.06 73.1924 175.849 73.1942 177.562 73.1983 179.275 73.2019 180.924 73.2077 182.508 73.2154L182.508 73.2788C180.909 73.2689 179.335 73.2617 177.784 73.2568 176.233 73.2523 174.602 73.25 172.889 73.25 169.816 73.25 167.399 73.259 165.638 73.277 163.882 73.2955 163.005 73.3202 163.005 73.3521 163.005 73.3841 163.882 73.4089 165.638 73.4273 167.399 73.4453 169.816 73.4543 172.889 73.4543 174.602 73.4543 176.233 73.452 177.784 73.4475 179.335 73.4426 180.909 73.4354 182.508 73.4255L182.508 73.4889Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M32.9982 72.3388C31.0883 72.7298 29.1062 73.0241 27.052 73.2208 24.9978 73.4156 22.8757 73.5128 20.6856 73.5128 15.7368 73.5128 11.8172 72.9301 8.9269 71.7646 6.03655 70.5997 4.59138 69.0203 4.59138 67.0274 4.59138 65.0115 6.06202 63.4249 9.00331 62.2684 11.9488 61.112 15.983 60.5338 21.1058 60.5338 23.0794 60.5338 24.9702 60.6126 26.7783 60.77 28.5905 60.9253 30.2989 61.157 31.9032 61.4643L31.9032 64.0512C30.2479 63.6566 28.6012 63.3614 26.9629 63.1666 25.3246 62.97 23.6821 62.8719 22.0353 62.8719 18.9879 62.8719 16.6387 63.2309 14.9877 63.9495 13.3367 64.6664 12.5112 65.6923 12.5112 67.0274 12.5112 68.3503 13.307 69.3726 14.8986 70.0944 16.4902 70.8148 18.7502 71.1752 21.6788 71.1752 22.4767 71.1752 23.2173 71.1545 23.9007 71.1135 24.584 71.0703 25.1973 71.0042 25.7405 70.9151L25.7405 68.4862 21.0612 68.4862 21.0612 66.3222 32.9982 66.3222 32.9982 72.3388Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M62.9002 63.1792C60.9901 63.7444 59.0079 64.1691 56.9538 64.4531 54.8997 64.7343 52.7776 64.8751 50.5876 64.8751 45.6388 64.8751 41.7191 64.0337 38.8288 62.3508 35.9385 60.6683 34.4933 58.3879 34.4933 55.5103 34.4933 52.599 35.9639 50.3086 38.9052 48.6388 41.8507 46.969 45.885 46.1338 51.0079 46.1338 52.9815 46.1338 54.8723 46.2477 56.6802 46.4744 58.4923 46.699 60.2008 47.0333 61.805 47.4774L61.805 51.2131C60.15 50.643 58.5031 50.2169 56.8647 49.9356 55.2264 49.6517 53.584 49.5099 51.9371 49.5099 48.8899 49.5099 46.5406 50.0283 44.8896 51.0659 43.2386 52.1009 42.4131 53.5822 42.4131 55.5103 42.4131 57.42 43.2089 58.8964 44.8005 59.9389 46.3921 60.9793 48.6523 61.499 51.5807 61.499 52.3785 61.499 53.1192 61.4693 53.8027 61.4104 54.4857 61.3483 55.099 61.2529 55.6426 61.1237L55.6426 57.6162 50.9634 57.6162 50.9634 54.492 62.9002 54.492 62.9002 63.1792Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M92.8022 73.569C90.8921 73.5515 88.9099 73.538 86.8558 73.529 84.8017 73.5204 82.6796 73.5159 80.4896 73.5159 75.5408 73.5159 71.6211 73.542 68.7309 73.5951 65.8403 73.6478 64.395 73.7193 64.395 73.8093 64.395 73.9007 65.866 73.9727 68.807 74.0249 71.7525 74.0775 75.7869 74.1036 80.9094 74.1036 82.883 74.1036 84.7742 74.1 86.5822 74.0928 88.3943 74.0861 90.1028 74.0753 91.707 74.0613L91.707 73.9443C90.052 73.9623 88.4051 73.9754 86.7667 73.9844 85.1284 73.9934 83.486 73.9979 81.8391 73.9979 78.7919 73.9979 76.4426 73.9817 74.7916 73.9488 73.1407 73.9164 72.315 73.8701 72.315 73.8093 72.315 73.7495 73.111 73.7036 74.7025 73.6707 76.2941 73.6379 78.5543 73.6217 81.4827 73.6217 82.2805 73.6217 83.0211 73.6226 83.7047 73.6244 84.3877 73.6266 85.001 73.6293 85.5441 73.6334L85.5441 73.7436 80.8649 73.7436 80.8649 73.8413 92.8022 73.8413 92.8022 73.569Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M122.704 67.2676C120.794 69.3456 118.812 70.907 116.758 71.9509 114.704 72.985 112.582 73.5024 110.392 73.5024 105.442 73.5024 101.523 70.4084 98.6325 64.2213 95.7423 58.0338 94.297 49.6494 94.297 39.0682 94.297 28.3636 95.7679 19.9413 98.709 13.8014 101.654 7.66136 105.689 4.59138 110.811 4.59138 112.785 4.59138 114.676 5.00895 116.484 5.84405 118.296 6.66966 120.005 7.89862 121.609 9.5309L121.609 23.2675C119.953 21.1703 118.307 19.6044 116.669 18.57 115.03 17.5261 113.388 17.0042 111.741 17.0042 108.694 17.0042 106.345 18.9117 104.694 22.7266 103.043 26.532 102.217 31.9793 102.217 39.0682 102.217 46.0906 103.013 51.5191 104.604 55.3528 106.196 59.1771 108.456 61.0895 111.385 61.0895 112.182 61.0895 112.923 60.9806 113.606 60.7619 114.29 60.5343 114.903 60.1833 115.446 59.7086L115.446 46.8119 110.767 46.8119 110.767 35.3245 122.704 35.3245 122.704 67.2676Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M152.606 73.5164C150.696 73.5164 148.714 73.5159 146.66 73.5159 144.606 73.5159 142.483 73.5159 140.293 73.5159 135.344 73.5159 131.425 73.5159 128.534 73.5164 125.644 73.5168 124.199 73.5177 124.199 73.5186 124.199 73.5191 125.669 73.52 128.611 73.5204 131.556 73.5209 135.59 73.5209 140.713 73.5209 142.687 73.5209 144.578 73.5209 146.386 73.5209 148.198 73.5209 149.906 73.5209 151.511 73.5209L151.511 73.5195C149.855 73.52 148.209 73.52 146.571 73.52 144.932 73.52 143.29 73.52 141.643 73.52 138.595 73.52 136.247 73.52 134.596 73.5195 132.944 73.5195 132.119 73.5191 132.119 73.5186 132.119 73.5177 132.914 73.5177 134.506 73.5173 136.098 73.5168 138.358 73.5168 141.287 73.5168 142.084 73.5168 142.825 73.5168 143.508 73.5168 144.192 73.5168 144.805 73.5168 145.348 73.5168L145.348 73.5177 140.669 73.5177 140.669 73.5186 152.606 73.5186 152.606 73.5164Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M182.508 73.5141C180.598 73.5141 178.616 73.5141 176.562 73.5141 174.507 73.5141 172.385 73.5141 170.195 73.5141 165.246 73.5141 161.327 73.5141 158.436 73.5137 155.546 73.5137 154.101 73.5132 154.101 73.5132 154.101 73.5128 155.571 73.5123 158.513 73.5123 161.458 73.5119 165.492 73.5119 170.615 73.5119 172.589 73.5119 174.48 73.5119 176.288 73.5119 178.1 73.5119 179.808 73.5119 181.413 73.5119L181.413 73.5123C179.757 73.5123 178.111 73.5123 176.473 73.5123 174.834 73.5123 173.191 73.5123 171.545 73.5123 168.497 73.5123 166.148 73.5123 164.497 73.5123 162.846 73.5128 162.021 73.5128 162.021 73.5132 162.021 73.5132 162.816 73.5132 164.408 73.5137 166 73.5137 168.26 73.5137 171.188 73.5137 171.986 73.5137 172.727 73.5137 173.41 73.5137 174.094 73.5137 174.707 73.5137 175.25 73.5137L175.25 73.5132 170.571 73.5132 170.571 73.5128 182.508 73.5128 182.508 73.5141Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M4.59138 31.1004 32.9982 31.1004 32.9982 36.8408 22.7784 36.8408 22.7784 60.5338 14.8311 60.5338 14.8311 36.8408 4.59138 36.8408 4.59138 31.1004Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M34.4933 72.0297 62.9002 72.0297 62.9002 72.3195 52.6804 72.3195 52.6804 73.5159 44.733 73.5159 44.733 72.3195 34.4933 72.3195 34.4933 72.0297Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M64.395 70.9529 92.8022 70.9529 92.8022 71.4528 82.582 71.4528 82.582 73.5159 74.635 73.5159 74.635 71.4528 64.395 71.4528 64.395 70.9529Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M94.297 73.5024 122.704 73.5024 122.704 73.5051 112.484 73.5051 112.484 73.5159 104.537 73.5159 104.537 73.5051 94.297 73.5051 94.297 73.5024Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M124.199 71.7898 152.606 71.7898 152.606 72.0778 142.386 72.0778 142.386 73.2667 134.439 73.2667 134.439 72.0778 124.199 72.0778 124.199 71.7898Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/><path d="M154.101 73.5141 182.508 73.5141 182.508 73.5146 172.288 73.5146 172.288 73.5159 164.34 73.5159 164.34 73.5146 154.101 73.5146 154.101 73.5141Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 724.284 1079.05)"/></g></g><path d="M339.011 527.024 528.185 527.024 528.185 608.697 339.011 608.697Z" stroke="#000000" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><g><g><path d="M0 79.8902 187.099 79.8902 187.099 0 0 0Z" fill="#FFFFFF" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M4.59138 74.9837 182.508 74.9837" stroke="#000000" stroke-width="0.318846" stroke-linecap="square" stroke-linejoin="round" stroke-miterlimit="4" stroke-opacity="1" fill="none" fill-rule="nonzero" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M24.2688 74.9181 13.344 74.9181 11.619 74.9496 4.59138 74.9496 14.6276 74.775 22.962 74.775 32.9982 74.9496 25.9764 74.9496 24.2688 74.9181ZM15.0864 74.8857 22.509 74.8857 18.8035 74.8159 15.0864 74.8857Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M54.1708 69.8095 43.2459 69.8095 41.521 74.5198 34.4933 74.5198 44.5295 48.6636 52.864 48.6636 62.9002 74.5198 55.8784 74.5198 54.1708 69.8095ZM44.9883 65.011 52.4109 65.011 48.7054 54.726 44.9883 65.011Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M84.0727 74.9815 73.1479 74.9815 71.4227 74.9837 64.395 74.9837 74.4312 74.9725 82.766 74.9725 92.8022 74.9837 85.7804 74.9837 84.0727 74.9815ZM74.8902 74.9792 82.3129 74.9792 78.6074 74.9752 74.8902 74.9792Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M113.975 74.5725 103.05 74.5725 101.325 74.9752 94.297 74.9752 104.333 72.7654 112.668 72.7654 122.704 74.9752 115.682 74.9752 113.975 74.5725ZM104.792 74.1626 112.215 74.1626 108.509 73.2838 104.792 74.1626Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M143.877 74.9973 132.952 74.9973 131.227 74.9837 124.199 74.9837 134.235 75.0593 142.57 75.0593 152.606 74.9837 145.584 74.9837 143.877 74.9973ZM134.694 75.0112 142.117 75.0112 138.411 75.0413 134.694 75.0112Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M32.9982 71.4911C31.4141 72.576 29.7655 73.3931 28.0526 73.9421 26.3396 74.4973 24.5504 74.775 22.6847 74.775 17.1212 74.775 12.7148 72.7181 9.46543 68.6045 6.21609 64.4909 4.59138 58.9162 4.59138 51.8813 4.59138 44.821 6.21609 39.2372 9.46543 35.1297 12.7148 31.016 17.1212 28.9591 22.6847 28.9591 24.5504 28.9591 26.3396 29.2367 28.0526 29.792 29.7655 30.3409 31.4141 31.158 32.9982 32.2432L32.9982 41.3761C31.3998 39.9375 29.8252 38.8839 28.2745 38.2151 26.7237 37.5463 25.0919 37.2119 23.379 37.2119 20.3061 37.2119 17.8894 38.5147 16.1287 41.1205 14.3729 43.72 13.4949 47.3069 13.4949 51.8813 13.4949 56.4368 14.3729 60.0172 16.1287 62.623 17.8894 65.2225 20.3061 66.5225 23.379 66.5225 25.0919 66.5225 26.7237 66.1877 28.2745 65.519 29.8252 64.8441 31.3998 63.7871 32.9982 62.3485L32.9982 71.4911Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M62.9002 74.9824C61.3159 74.9829 59.6676 74.9833 57.9546 74.9833 56.2415 74.9833 54.4524 74.9837 52.5869 74.9837 47.023 74.9837 42.6167 74.9829 39.3674 74.9815 36.118 74.9801 34.4933 74.9783 34.4933 74.9757 34.4933 74.9734 36.118 74.9716 39.3674 74.9702 42.6167 74.9684 47.023 74.968 52.5869 74.968 54.4524 74.968 56.2415 74.968 57.9546 74.9684 59.6676 74.9684 61.3159 74.9689 62.9002 74.9689L62.9002 74.9721C61.3019 74.9716 59.727 74.9712 58.1764 74.9712 56.6258 74.9707 54.9938 74.9707 53.2807 74.9707 50.2079 74.9707 47.7915 74.9712 46.0308 74.9721 44.2748 74.973 43.3968 74.9743 43.3968 74.9757 43.3968 74.9774 44.2748 74.9783 46.0308 74.9792 47.7915 74.9801 50.2079 74.9806 53.2807 74.9806 54.9938 74.9806 56.6258 74.9806 58.1764 74.9806 59.727 74.9801 61.3019 74.9797 62.9002 74.9792L62.9002 74.9824Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M92.8022 70.7099C91.2178 71.9991 89.5696 72.9697 87.8565 73.6217 86.1435 74.2813 84.3544 74.6112 82.4884 74.6112 76.9249 74.6112 72.5188 72.1678 69.2691 67.2807 66.0199 62.3935 64.395 55.7709 64.395 47.4131 64.395 39.0256 66.0199 32.3919 69.2691 27.5122 72.5188 22.625 76.9249 20.1815 82.4884 20.1815 84.3544 20.1815 86.1435 20.5112 87.8565 21.1709 89.5696 21.823 91.2178 22.7937 92.8022 24.0829L92.8022 34.9329C91.2034 33.2239 89.629 31.9721 88.0784 31.1776 86.5278 30.383 84.8957 29.9858 83.1827 29.9858 80.1098 29.9858 77.693 31.5336 75.9327 34.6293 74.1765 37.7176 73.2986 41.9789 73.2986 47.4131 73.2986 52.8249 74.1765 57.0789 75.9327 60.1747 77.693 63.2629 80.1098 64.8067 83.1827 64.8067 84.8957 64.8067 86.5278 64.4099 88.0784 63.6152 89.629 62.8129 91.2034 61.5575 92.8022 59.8485L92.8022 70.7099Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M122.704 74.9851C121.12 74.9846 119.471 74.9842 117.759 74.9842 116.045 74.9837 114.256 74.9837 112.39 74.9837 106.827 74.9837 102.42 74.9846 99.1711 74.9865 95.9218 74.9887 94.297 74.9914 94.297 74.995 94.297 74.9986 95.9218 75.0013 99.1711 75.0035 102.42 75.0058 106.827 75.0067 112.39 75.0067 114.256 75.0067 116.045 75.0067 117.759 75.0062 119.471 75.0058 121.12 75.0053 122.704 75.0049L122.704 75.0004C121.105 75.0013 119.531 75.0017 117.98 75.0022 116.429 75.0022 114.798 75.0026 113.085 75.0026 110.012 75.0026 107.595 75.0017 105.835 75.0004 104.078 74.9991 103.201 74.9973 103.201 74.995 103.201 74.9927 104.078 74.991 105.835 74.9896 107.595 74.9883 110.012 74.9878 113.085 74.9878 114.798 74.9878 116.429 74.9878 117.98 74.9883 119.531 74.9887 121.105 74.9892 122.704 74.9896L122.704 74.9851Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M152.606 74.9567C151.022 74.9604 149.373 74.9631 147.66 74.9649 145.947 74.9667 144.158 74.9676 142.292 74.9676 136.729 74.9676 132.322 74.9608 129.073 74.9469 125.824 74.9329 124.199 74.9145 124.199 74.8906 124.199 74.8668 125.824 74.8483 129.073 74.8344 132.322 74.8204 136.729 74.8137 142.292 74.8137 144.158 74.8137 145.947 74.8146 147.66 74.8164 149.373 74.8182 151.022 74.8209 152.606 74.8245L152.606 74.8555C151.007 74.8506 149.433 74.847 147.882 74.8447 146.331 74.8425 144.7 74.8411 142.987 74.8411 139.914 74.8411 137.497 74.8456 135.736 74.8546 133.98 74.8632 133.103 74.8753 133.103 74.8906 133.103 74.9059 133.98 74.9181 135.736 74.9266 137.497 74.9356 139.914 74.9397 142.987 74.9397 144.7 74.9397 146.331 74.9388 147.882 74.9365 149.433 74.9343 151.007 74.9306 152.606 74.9257L152.606 74.9567Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M182.508 74.9829C180.924 74.9829 179.275 74.9833 177.562 74.9833 175.849 74.9833 174.06 74.9837 172.194 74.9837 166.631 74.9837 162.224 74.9833 158.975 74.9819 155.726 74.981 154.101 74.9801 154.101 74.9783 154.101 74.9765 155.726 74.9757 158.975 74.9748 162.224 74.9739 166.631 74.973 172.194 74.973 174.06 74.973 175.849 74.9734 177.562 74.9734 179.275 74.9734 180.924 74.9739 182.508 74.9739L182.508 74.9761C180.909 74.9757 179.335 74.9752 177.784 74.9752 176.233 74.9752 174.602 74.9752 172.889 74.9752 169.816 74.9752 167.399 74.9752 165.638 74.9761 163.882 74.9765 163.005 74.9774 163.005 74.9783 163.005 74.9792 163.882 74.9801 165.638 74.9806 167.399 74.9815 169.816 74.9815 172.889 74.9815 174.602 74.9815 176.233 74.9815 177.784 74.9815 179.335 74.9815 180.909 74.981 182.508 74.9806L182.508 74.9829Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M32.9982 74.9806C31.0883 74.9815 29.1062 74.9824 27.052 74.9829 24.9978 74.9833 22.8757 74.9837 20.6856 74.9837 15.7368 74.9837 11.8172 74.9819 8.9269 74.9788 6.03655 74.9761 4.59138 74.9721 4.59138 74.9667 4.59138 74.9613 6.06202 74.9572 9.00331 74.954 11.9488 74.9514 15.983 74.9496 21.1058 74.9496 23.0794 74.9496 24.9702 74.95 26.7783 74.9505 28.5905 74.9509 30.2989 74.9514 31.9032 74.9523L31.9032 74.959C30.2479 74.9576 28.6012 74.9572 26.9629 74.9567 25.3246 74.9558 23.6821 74.9558 22.0353 74.9558 18.9879 74.9558 16.6387 74.9567 14.9877 74.9586 13.3367 74.9604 12.5112 74.9631 12.5112 74.9667 12.5112 74.9702 13.307 74.973 14.8986 74.9748 16.4902 74.9765 18.7502 74.9774 21.6788 74.9774 22.4767 74.9774 23.2173 74.9774 23.9007 74.9774 24.584 74.977 25.1973 74.977 25.7405 74.977L25.7405 74.9702 21.0612 74.9702 21.0612 74.9649 32.9982 74.9649 32.9982 74.9806Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M62.9002 74.9275C60.9901 74.941 59.0079 74.9509 56.9538 74.9581 54.8997 74.9644 52.7776 74.968 50.5876 74.968 45.6388 74.968 41.7191 74.9478 38.8288 74.9077 35.9385 74.8672 34.4933 74.8128 34.4933 74.7439 34.4933 74.6746 35.9639 74.6197 38.9052 74.5797 41.8507 74.5401 45.885 74.5198 51.0079 74.5198 52.9815 74.5198 54.8723 74.5225 56.6802 74.5279 58.4923 74.5333 60.2008 74.5414 61.805 74.5522L61.805 74.6413C60.15 74.6278 58.5031 74.6175 56.8647 74.6107 55.2264 74.604 53.584 74.6008 51.9371 74.6008 48.8899 74.6008 46.5406 74.613 44.8896 74.6377 43.2386 74.6625 42.4131 74.698 42.4131 74.7439 42.4131 74.7898 43.2089 74.8249 44.8005 74.8501 46.3921 74.8749 48.6523 74.8875 51.5807 74.8875 52.3785 74.8875 53.1192 74.8866 53.8027 74.8852 54.4857 74.8839 55.099 74.8812 55.6426 74.8785L55.6426 74.7943 50.9634 74.7943 50.9634 74.7196 62.9002 74.7196 62.9002 74.9275Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M92.8022 74.9712C90.8921 74.9716 88.9099 74.9721 86.8558 74.9721 84.8017 74.9725 82.6796 74.9725 80.4896 74.9725 75.5408 74.9725 71.6211 74.9721 68.7309 74.9707 65.8403 74.9698 64.395 74.9684 64.395 74.9667 64.395 74.9649 65.866 74.9635 68.807 74.9626 71.7525 74.9613 75.7869 74.9608 80.9094 74.9608 82.883 74.9608 84.7742 74.9608 86.5822 74.9613 88.3943 74.9613 90.1028 74.9613 91.707 74.9617L91.707 74.964C90.052 74.9635 88.4051 74.9635 86.7667 74.9631 85.1284 74.9631 83.486 74.9631 81.8391 74.9631 78.7919 74.9631 76.4426 74.9631 74.7916 74.964 73.1407 74.9644 72.315 74.9653 72.315 74.9667 72.315 74.968 73.111 74.9689 74.7025 74.9693 76.2941 74.9698 78.5543 74.9702 81.4827 74.9702 82.2805 74.9702 83.0211 74.9702 83.7047 74.9702 84.3877 74.9702 85.001 74.9702 85.5441 74.9702L85.5441 74.968 80.8649 74.968 80.8649 74.9662 92.8022 74.9662 92.8022 74.9712Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M122.704 69.329C120.794 70.4746 118.812 71.3349 116.758 71.9104 114.704 72.4806 112.582 72.7654 110.392 72.7654 105.442 72.7654 101.523 71.0604 98.6325 67.6501 95.7423 64.2398 94.297 59.6181 94.297 53.786 94.297 47.886 95.7679 43.2438 98.709 39.8595 101.654 36.4753 105.689 34.7832 110.811 34.7832 112.785 34.7832 114.676 35.0133 116.484 35.4737 118.296 35.9287 120.005 36.6061 121.609 37.5057L121.609 45.0773C119.953 43.9212 118.307 43.0581 116.669 42.488 115.03 41.9126 113.388 41.6249 111.741 41.6249 108.694 41.6249 106.345 42.6763 104.694 44.779 103.043 46.8763 102.217 49.8789 102.217 53.786 102.217 57.6567 103.013 60.6486 104.604 62.7621 106.196 64.8697 108.456 65.924 111.385 65.924 112.182 65.924 112.923 65.8637 113.606 65.7436 114.29 65.618 114.903 65.4246 115.446 65.1627L115.446 58.0545 110.767 58.0545 110.767 51.7225 122.704 51.7225 122.704 69.329Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M152.606 74.9819C150.696 74.9824 148.714 74.9829 146.66 74.9833 144.606 74.9833 142.483 74.9837 140.293 74.9837 135.344 74.9837 131.425 74.9829 128.534 74.9815 125.644 74.9801 124.199 74.9779 124.199 74.9757 124.199 74.973 125.669 74.9712 128.611 74.9698 131.556 74.9684 135.59 74.9676 140.713 74.9676 142.687 74.9676 144.578 74.9676 146.386 74.968 148.198 74.968 149.906 74.9684 151.511 74.9689L151.511 74.9721C149.855 74.9716 148.209 74.9712 146.571 74.9707 144.932 74.9707 143.29 74.9702 141.643 74.9702 138.595 74.9702 136.247 74.9707 134.596 74.9716 132.944 74.9725 132.119 74.9739 132.119 74.9757 132.119 74.977 132.914 74.9783 134.506 74.9792 136.098 74.9801 138.358 74.9806 141.287 74.9806 142.084 74.9806 142.825 74.9806 143.508 74.9806 144.192 74.9806 144.805 74.9806 145.348 74.9801L145.348 74.9774 140.669 74.9774 140.669 74.9748 152.606 74.9748 152.606 74.9819Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M182.508 68.5829C180.598 70.7045 178.616 72.2983 176.562 73.3643 174.507 74.4204 172.385 74.9482 170.195 74.9482 165.246 74.9482 161.327 71.7898 158.436 65.4727 155.546 59.1551 154.101 50.5948 154.101 39.7917 154.101 28.8624 155.571 20.2634 158.513 13.9946 161.458 7.72579 165.492 4.59138 170.615 4.59138 172.589 4.59138 174.48 5.01773 176.288 5.87033 178.1 6.71331 179.808 7.96801 181.413 9.63453L181.413 23.6594C179.757 21.5181 178.111 19.9194 176.473 18.8634 174.834 17.7976 173.191 17.2647 171.545 17.2647 168.497 17.2647 166.148 19.2122 164.497 23.1072 162.846 26.9925 162.021 32.554 162.021 39.7917 162.021 46.9618 162.816 52.5036 164.408 56.4179 166 60.3228 168.26 62.2752 171.188 62.2752 171.986 62.2752 172.727 62.1636 173.41 61.9409 174.094 61.7082 174.707 61.3496 175.25 60.8654L175.25 47.6979 170.571 47.6979 170.571 35.9693 182.508 35.9693 182.508 68.5829Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M4.59138 75.0031 32.9982 75.0031 32.9982 74.9991 22.7784 74.9991 22.7784 74.9837 14.8311 74.9837 14.8311 74.9991 4.59138 74.9991 4.59138 75.0031Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M34.4933 75.2987 62.9002 75.2987 62.9002 75.2375 52.6804 75.2375 52.6804 74.9837 44.733 74.9837 44.733 75.2375 34.4933 75.2375 34.4933 75.2987Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M64.395 74.6112 92.8022 74.6112 92.8022 74.6796 82.582 74.6796 82.582 74.9608 74.635 74.9608 74.635 74.6796 64.395 74.6796 64.395 74.6112Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M94.297 74.9752 122.704 74.9752 122.704 74.9765 112.484 74.9765 112.484 74.9837 104.537 74.9837 104.537 74.9765 94.297 74.9765 94.297 74.9752Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M124.199 41.2249 152.606 41.2249 152.606 47.7758 142.386 47.7758 142.386 74.8137 134.439 74.8137 134.439 47.7758 124.199 47.7758 124.199 41.2249Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/><path d="M154.101 74.9482 182.508 74.9482 182.508 74.9531 172.288 74.9531 172.288 74.973 164.34 74.973 164.34 74.9531 154.101 74.9531 154.101 74.9482Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1.00008 0 0 1 715.289 1094.79)"/></g></g><path d="M330.015 542.767 519.189 542.767 519.189 624.44 330.015 624.44Z" stroke="#000000" stroke-width="2.06154" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><g><g><g><path d="M0 135.872 787.917 135.872 787.917 0 0 0Z" fill="#FFFFFF" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M14.0074 84.6722 773.91 84.6722" stroke="#000000" stroke-width="0.972736" stroke-linecap="square" stroke-linejoin="round" stroke-miterlimit="4" stroke-opacity="1" fill="none" fill-rule="nonzero" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M121.128 91.4474 110.429 91.4474 108.74 84.6722 101.857 84.6722 111.686 121.865 119.848 121.865 129.676 84.6722 122.8 84.6722 121.128 91.4474ZM112.135 98.3504 119.404 98.3504 115.775 113.144 112.135 98.3504Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M267.544 89.1661 256.845 89.1661 255.156 84.6722 248.274 84.6722 258.102 109.341 266.264 109.341 276.093 84.6722 269.216 84.6722 267.544 89.1661ZM258.551 93.7445 265.821 93.7445 262.192 103.557 258.551 93.7445Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M326.111 88.3884 315.412 88.3884 313.723 84.6722 306.841 84.6722 316.669 105.071 324.831 105.071 334.66 84.6722 327.783 84.6722 326.111 88.3884ZM317.118 92.1743 324.387 92.1743 320.758 100.289 317.118 92.1743Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M355.394 72.8018 344.695 72.8018 343.006 84.6722 336.124 84.6722 345.952 19.5103 354.114 19.5103 363.943 84.6722 357.066 84.6722 355.394 72.8018ZM346.401 60.7078 353.671 60.7078 350.042 34.7884 346.401 60.7078Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M501.809 79.0242 491.113 79.0242 489.422 84.6722 482.54 84.6722 492.368 53.6663 500.532 53.6663 510.36 84.6722 503.482 84.6722 501.809 79.0242ZM492.818 73.2691 500.086 73.2691 496.457 60.9359 492.818 73.2691Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M531.095 74.9725 520.394 74.9725 518.708 84.6722 511.822 84.6722 521.654 31.4269 529.813 31.4269 539.641 84.6722 532.764 84.6722 531.095 74.9725ZM522.099 65.0906 529.372 65.0906 525.742 43.911 522.099 65.0906Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M618.944 86.1907 608.243 86.1907 606.556 84.6722 599.675 84.6722 609.503 93.0073 617.662 93.0073 627.494 84.6722 620.617 84.6722 618.944 86.1907ZM609.952 87.7375 617.221 87.7375 613.591 91.053 609.952 87.7375Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M736.078 88.2881 725.378 88.2881 723.691 84.6722 716.805 84.6722 726.637 104.521 734.796 104.521 744.624 84.6722 737.747 84.6722 736.078 88.2881ZM727.082 91.9719 734.356 91.9719 730.726 99.8671 727.082 91.9719Z" fill="#109648" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M41.8265 84.7568C40.2752 84.7289 38.6607 84.7078 36.9832 84.6938 35.3057 84.6794 33.5535 84.6722 31.7264 84.6722 26.278 84.6722 21.9627 84.7253 18.7806 84.831 15.5985 84.9372 14.0074 85.0802 14.0074 85.2615 14.0074 85.4432 15.5985 85.5867 18.7806 85.6924 21.9627 85.7981 26.278 85.8511 31.7264 85.8511 33.5535 85.8511 35.3057 85.8439 36.9832 85.8295 38.6607 85.8156 40.2752 85.7944 41.8265 85.7666L41.8265 85.5318C40.2612 85.5687 38.7192 85.5956 37.2005 85.6132 35.6819 85.6303 34.0838 85.6388 32.4063 85.6388 29.3971 85.6388 27.0303 85.6051 25.3061 85.5381 23.5865 85.4715 22.7267 85.3793 22.7267 85.2615 22.7267 85.1441 23.5865 85.0518 25.3061 84.9848 27.0303 84.9183 29.3971 84.8845 32.4063 84.8845 34.0838 84.8845 35.6819 84.8931 37.2005 84.9106 38.7192 84.9277 40.2612 84.9551 41.8265 84.992L41.8265 84.7568Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M158.96 82.8632C157.408 83.4609 155.794 83.9112 154.116 84.2134 152.439 84.5193 150.687 84.6722 148.86 84.6722 143.411 84.6722 139.096 83.5392 135.914 81.2732 132.731 79.0071 131.141 75.9364 131.141 72.0614 131.141 68.1721 132.731 65.0964 135.914 62.834 139.096 60.5679 143.411 59.4349 148.86 59.4349 150.687 59.4349 152.439 59.5878 154.116 59.8937 155.794 60.196 157.408 60.6462 158.96 61.244L158.96 66.2749C157.394 65.4823 155.853 64.9017 154.334 64.5333 152.815 64.1649 151.217 63.9809 149.539 63.9809 146.53 63.9809 144.163 64.6984 142.439 66.1341 140.72 67.5658 139.86 69.5417 139.86 72.0614 139.86 74.5708 140.72 76.5432 142.439 77.9784 144.163 79.4106 146.53 80.1262 149.539 80.1262 151.217 80.1262 152.815 79.9422 154.334 79.5738 155.853 79.2019 157.394 78.6198 158.96 77.8273L158.96 82.8632Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M451.792 81.5903C450.241 82.6086 448.627 83.3755 446.949 83.8905 445.272 84.4118 443.519 84.6722 441.692 84.6722 436.244 84.6722 431.929 82.7417 428.747 78.8812 425.565 75.0202 423.974 69.7887 423.974 63.1862 423.974 56.5603 425.565 51.3198 428.747 47.4651 431.929 43.6041 436.244 41.6737 441.692 41.6737 443.519 41.6737 445.272 41.9343 446.949 42.4554 448.627 42.9705 450.241 43.7374 451.792 44.7559L451.792 53.3272C450.227 51.9769 448.685 50.9883 447.167 50.3604 445.648 49.7329 444.05 49.419 442.372 49.419 439.363 49.419 436.997 50.6419 435.272 53.0875 433.552 55.5271 432.693 58.8934 432.693 63.1862 432.693 67.4614 433.552 70.8223 435.272 73.2678 436.997 75.7074 439.363 76.9273 442.372 76.9273 444.05 76.9273 445.648 76.6133 447.167 75.9854 448.685 75.3521 450.227 74.3603 451.792 73.01L451.792 81.5903Z" fill="#255C99" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M100.393 84.5449C98.5226 84.5877 96.5818 84.6191 94.5699 84.6407 92.5584 84.6619 90.4799 84.6722 88.3353 84.6722 83.4888 84.6722 79.6503 84.6093 76.8198 84.4829 73.9892 84.3565 72.5742 84.1856 72.5742 83.9697 72.5742 83.7511 74.014 83.5792 76.8944 83.4537 79.7794 83.3282 83.7299 83.2657 88.7469 83.2657 90.6796 83.2657 92.5314 83.2743 94.3018 83.2914 96.0767 83.308 97.7494 83.3332 99.3206 83.3665L99.3206 83.6471C97.7 83.604 96.087 83.572 94.4826 83.5509 92.8782 83.5298 91.2698 83.519 89.6568 83.519 86.6729 83.519 84.3722 83.5581 82.7552 83.6359 81.1382 83.7137 80.3299 83.8248 80.3299 83.9697 80.3299 84.1127 81.1094 84.2238 82.668 84.302 84.2265 84.3799 86.4399 84.419 89.3078 84.419 90.0895 84.419 90.8146 84.4167 91.4839 84.4122 92.1531 84.4078 92.7536 84.4006 93.2857 84.3907L93.2857 84.1275 88.7033 84.1275 88.7033 83.8932 100.393 83.8932 100.393 84.5449Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M422.509 78.2789C420.639 80.41 418.698 82.0108 416.686 83.0813 414.674 84.1419 412.596 84.6722 410.452 84.6722 405.605 84.6722 401.767 81.4999 398.936 75.1551 396.106 68.8099 394.69 60.2122 394.69 49.3618 394.69 38.3847 396.13 29.748 399.011 23.4518 401.895 17.1555 405.846 14.0074 410.863 14.0074 412.796 14.0074 414.647 14.4356 416.418 15.2919 418.193 16.1386 419.866 17.3988 421.437 19.0726L421.437 33.1589C419.816 31.0083 418.203 29.4026 416.599 28.3418 414.995 27.2714 413.386 26.7362 411.773 26.7362 408.789 26.7362 406.489 28.6922 404.872 32.6042 403.255 36.5065 402.446 42.0924 402.446 49.3618 402.446 56.563 403.225 62.1296 404.784 66.0608 406.343 69.9825 408.556 71.9436 411.424 71.9436 412.205 71.9436 412.931 71.8316 413.6 71.6081 414.269 71.3742 414.87 71.0143 415.402 70.5276L415.402 57.3025 410.819 57.3025 410.819 45.5229 422.509 45.5229 422.509 78.2789Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M656.776 85.0168C654.904 84.9021 652.966 84.8157 650.951 84.7581 648.94 84.701 646.862 84.6722 644.717 84.6722 639.872 84.6722 636.031 84.8431 633.202 85.1854 630.373 85.5273 628.956 85.991 628.956 86.5757 628.956 87.1677 630.395 87.6332 633.279 87.9723 636.162 88.3119 640.111 88.4815 645.13 88.4815 647.06 88.4815 648.913 88.4586 650.685 88.4122 652.458 88.3668 654.131 88.2989 655.705 88.2085L655.705 87.4492C654.081 87.5653 652.471 87.6516 650.865 87.7088 649.26 87.7668 647.654 87.7956 646.039 87.7956 643.057 87.7956 640.754 87.6899 639.139 87.4794 637.52 87.2689 636.71 86.9675 636.71 86.5757 636.71 86.1876 637.493 85.8876 639.049 85.6757 640.61 85.4643 642.823 85.3586 645.693 85.3586 646.471 85.3586 647.195 85.3645 647.865 85.3766 648.535 85.3892 649.138 85.4085 649.669 85.4346L649.669 86.1475 645.086 86.1475 645.086 86.7826 656.776 86.7826 656.776 85.0168Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M715.343 84.7919C713.472 84.7523 711.533 84.7222 709.518 84.7019 707.508 84.6821 705.429 84.6722 703.284 84.6722 698.44 84.6722 694.599 84.7316 691.769 84.8508 688.94 84.9695 687.523 85.1306 687.523 85.3339 687.523 85.5394 688.963 85.7009 691.846 85.8192 694.729 85.937 698.678 85.996 703.698 85.996 705.627 85.996 707.481 85.9879 709.253 85.9717 711.025 85.9559 712.698 85.9325 714.268 85.9011L714.268 85.637C712.649 85.6775 711.038 85.7076 709.433 85.7274 707.827 85.7472 706.221 85.7576 704.606 85.7576 701.62 85.7576 699.321 85.7207 697.707 85.6474 696.087 85.5745 695.278 85.4697 695.278 85.3339 695.278 85.1989 696.06 85.0946 697.617 85.0208 699.177 84.9475 701.39 84.9106 704.255 84.9106 705.038 84.9106 705.762 84.9129 706.432 84.9169 707.103 84.9214 707.701 84.9282 708.236 84.9372L708.236 85.185 703.653 85.185 703.653 85.4054 715.343 85.4054 715.343 84.7919Z" fill="#F7B32B" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M43.2907 89.3757 71.1097 89.3757 71.1097 88.4586 61.1014 88.4586 61.1014 84.6722 53.3187 84.6722 53.3187 88.4586 43.2907 88.4586 43.2907 89.3757Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M160.424 86.1772 188.243 86.1772 188.243 85.884 178.235 85.884 178.235 84.6722 170.452 84.6722 170.452 85.884 160.424 85.884 160.424 86.1772Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M189.707 40.4214 217.526 40.4214 217.526 49.0515 207.518 49.0515 207.518 84.6722 199.735 84.6722 199.735 49.0515 189.707 49.0515 189.707 40.4214Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M218.991 53.7041 246.81 53.7041 246.81 59.7439 236.801 59.7439 236.801 84.6722 229.018 84.6722 229.018 59.7439 218.991 59.7439 218.991 53.7041Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M277.557 79.6912 305.376 79.6912 305.376 80.6628 295.368 80.6628 295.368 84.6722 287.585 84.6722 287.585 80.6628 277.557 80.6628 277.557 79.6912Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M365.407 29.074 393.226 29.074 393.226 39.9172 383.218 39.9172 383.218 84.6722 375.435 84.6722 375.435 39.9172 365.407 39.9172 365.407 29.074Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M453.259 89.9667 481.074 89.9667 481.074 88.934 471.066 88.934 471.066 84.6722 463.285 84.6722 463.285 88.934 453.259 88.934 453.259 89.9667Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M541.107 51.2564 568.927 51.2564 568.927 57.7734 558.919 57.7734 558.919 84.6722 551.133 84.6722 551.133 57.7734 541.107 57.7734 541.107 51.2564Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M570.389 80.768 598.208 80.768 598.208 81.5295 588.2 81.5295 588.2 84.6722 580.419 84.6722 580.419 81.5295 570.389 81.5295 570.389 80.768Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M658.242 89.1764 686.057 89.1764 686.057 88.298 676.049 88.298 676.049 84.6722 668.268 84.6722 668.268 88.298 658.242 88.298 658.242 89.1764Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/><path d="M746.091 83.9031 773.91 83.9031 773.91 84.0533 763.902 84.0533 763.902 84.6722 756.116 84.6722 756.116 84.0533 746.091 84.0533 746.091 83.9031Z" fill="#D62839" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.16904 726.592 591.848)"/></g></g></g><path d="M739.402 207.297 739.402 213.482 733.213 213.482 733.213 207.297ZM739.402 219.667 739.402 225.851 733.213 225.851 733.213 219.667ZM739.402 232.036 739.402 238.22 733.218 238.22 733.213 232.036ZM739.402 244.405 739.402 250.59 733.218 250.59 733.218 244.405ZM739.402 256.774 739.402 262.278 733.218 262.278 733.218 256.774ZM720.844 210.39 736.308 179.467 751.771 210.39ZM751.771 259.186 736.308 290.109 720.848 259.186Z" fill="#000000" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><text fill="#000000" fill-opacity="1" font-family="Inter Medium,Inter Medium_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="500" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(0.449791 0 0 0.449972 1172.54 794)">Minimize Difference</text><path d="M739.402 444.337 739.402 511.615 733.218 511.615 733.213 444.337ZM720.844 447.429 736.308 416.506 751.771 447.429Z" fill="#000000" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M580.005 550.769 580.005 550.769 600.246 568.311 580.005 585.853Z" stroke="#595959" stroke-width="1.54616" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#595959" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M615.539 550.769 615.539 550.769 636.229 568.311 615.539 585.853Z" stroke="#595959" stroke-width="1.54616" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#595959" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><path d="M651.522 550.319 651.522 550.319 671.763 567.861 651.522 585.403Z" stroke="#595959" stroke-width="1.54616" stroke-linecap="butt" stroke-linejoin="round" stroke-miterlimit="10" stroke-opacity="1" fill="#595959" fill-rule="evenodd" fill-opacity="1" transform="matrix(1 0 0 1.0004 384.243 550.9)"/><text fill="#000000" fill-opacity="1" font-family="Inter Medium,Inter Medium_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="500" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(0.449791 0 0 0.449972 1103.19 1131)">Convolve</text><path d="M3.09233 27.8308 3.09247 319.875-3.09215 319.875-3.0923 27.8308ZM-15.4616 30.9231 0 0 15.4616 30.9231Z" fill="#000000" fill-rule="nonzero" fill-opacity="1" transform="matrix(1 0 0 -1.0004 1518.17 1124.24)"/><text fill="#000000" fill-opacity="1" font-family="Inter Medium,Inter Medium_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="500" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(0.449791 0 0 0.449972 1277.97 1039)">Backpropagate</text><text fill="#000000" fill-opacity="1" font-family="Inter SemiBold,Inter SemiBold_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="600" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(0.449791 0 0 0.449972 710.045 603)">Observed scores</text><text fill="#000000" fill-opacity="1" font-family="Inter SemiBold,Inter SemiBold_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="600" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(0.449791 0 0 0.449972 710.045 833)">Reconstructed scores</text><text fill="#000000" fill-opacity="1" font-family="Inter SemiBold,Inter SemiBold_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="600" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(0.449791 0 0 0.449972 710.045 1037)">M</text><text fill="#000000" fill-opacity="1" font-family="Inter SemiBold,Inter SemiBold_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="600" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(0.449791 0 0 0.449972 736.588 1037)">otif </text><text fill="#000000" fill-opacity="1" font-family="Inter SemiBold,Inter SemiBold_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="600" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(0.449791 0 0 0.449972 790.446 1037)">CWMs</text><text fill="#000000" fill-opacity="1" font-family="Inter SemiBold,Inter SemiBold_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="600" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(0.449791 0 0 0.449972 710.719 1330)">Hit coefficients (</text><text fill="#000000" fill-opacity="1" font-family="Inter SemiBold,Inter SemiBold_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="600" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(0.449791 0 0 0.449972 942.127 1330)">l</text><text fill="#000000" fill-opacity="1" font-family="Inter SemiBold,Inter SemiBold_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="600" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(0.449791 0 0 0.449972 949.598 1330)">earned</text><text fill="#000000" fill-opacity="1" font-family="Inter SemiBold,Inter SemiBold_MSFontService,sans-serif" font-style="normal" font-variant="normal" font-weight="600" font-stretch="normal" font-size="64" text-anchor="start" direction="ltr" writing-mode="lr-tb" unicode-bidi="normal" text-decoration="none" transform="matrix(0.449791 0 0 0.449972 1047.27 1330)">)</text></g></g></g></g></g></g></g></svg>
\ No newline at end of file

From 747908c281b40052cc2191e9bd3aaaaa53aa42bb Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 31 Aug 2025 13:52:32 -0700
Subject: [PATCH 16/39] Figure width

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c2c9bbc..17eea6c 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ The algorithm represents contribution scores as weighted combinations of motif c
 
 Fi-NeMo solves motif instance calling as an optimization problem that reconstructs contribution score tracks as sparse linear combinations of motif CWMs, formulated as an L1-regularized linear model. This competitive assignment encourages overlapping motif instances to be resolved in a meaningful way, with stronger matches receiving higher coefficients while weaker or redundant matches are suppressed.
 
-![Methods diagram](/assets/methods.svg | width=100)
+![Methods diagram](/assets/methods.svg)
 
 ## References
 

From 92a67bf2ef86326372ca868cbb94f4dfa5a17e71 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 31 Aug 2025 13:53:40 -0700
Subject: [PATCH 17/39] Figure width

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 17eea6c..7ce7d1a 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ The algorithm represents contribution scores as weighted combinations of motif c
 
 Fi-NeMo solves motif instance calling as an optimization problem that reconstructs contribution score tracks as sparse linear combinations of motif CWMs, formulated as an L1-regularized linear model. This competitive assignment encourages overlapping motif instances to be resolved in a meaningful way, with stronger matches receiving higher coefficients while weaker or redundant matches are suppressed.
 
-![Methods diagram](/assets/methods.svg)
+<img src="/assets/methods.svg" style="max-width: 600px; width: 100%;">
 
 ## References
 

From 76cb5ab0c5d046adc35e4f1b67c32a7ea730e8f2 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 31 Aug 2025 13:54:11 -0700
Subject: [PATCH 18/39] Figure width

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7ce7d1a..81812ae 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ The algorithm represents contribution scores as weighted combinations of motif c
 
 Fi-NeMo solves motif instance calling as an optimization problem that reconstructs contribution score tracks as sparse linear combinations of motif CWMs, formulated as an L1-regularized linear model. This competitive assignment encourages overlapping motif instances to be resolved in a meaningful way, with stronger matches receiving higher coefficients while weaker or redundant matches are suppressed.
 
-<img src="/assets/methods.svg" style="max-width: 600px; width: 100%;">
+<img src="/assets/methods.svg" style="max-width: 400px; width: 100%;">
 
 ## References
 

From b9a17c4fad3f47db08ffa4c472c5dfe2976fe432 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 31 Aug 2025 13:54:34 -0700
Subject: [PATCH 19/39] Figure width

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 81812ae..a127c91 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ The algorithm represents contribution scores as weighted combinations of motif c
 
 Fi-NeMo solves motif instance calling as an optimization problem that reconstructs contribution score tracks as sparse linear combinations of motif CWMs, formulated as an L1-regularized linear model. This competitive assignment encourages overlapping motif instances to be resolved in a meaningful way, with stronger matches receiving higher coefficients while weaker or redundant matches are suppressed.
 
-<img src="/assets/methods.svg" style="max-width: 400px; width: 100%;">
+<img src="/assets/methods.svg" style="max-width: 200px; width: 100%;">
 
 ## References
 

From 516cfc7520b970db289cfc9a09b77fe3d1104777 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 31 Aug 2025 13:55:18 -0700
Subject: [PATCH 20/39] Figure width

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a127c91..ddf9cfc 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ The algorithm represents contribution scores as weighted combinations of motif c
 
 Fi-NeMo solves motif instance calling as an optimization problem that reconstructs contribution score tracks as sparse linear combinations of motif CWMs, formulated as an L1-regularized linear model. This competitive assignment encourages overlapping motif instances to be resolved in a meaningful way, with stronger matches receiving higher coefficients while weaker or redundant matches are suppressed.
 
-<img src="/assets/methods.svg" style="max-width: 200px; width: 100%;">
+<img src="/assets/methods.svg" width="600" alt="Methods diagram">
 
 ## References
 

From 9f6034167dec133de13f25dc70273345466fc87d Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 31 Aug 2025 13:55:51 -0700
Subject: [PATCH 21/39] Figure width

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ddf9cfc..196be85 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,9 @@ The algorithm represents contribution scores as weighted combinations of motif c
 
 Fi-NeMo solves motif instance calling as an optimization problem that reconstructs contribution score tracks as sparse linear combinations of motif CWMs, formulated as an L1-regularized linear model. This competitive assignment encourages overlapping motif instances to be resolved in a meaningful way, with stronger matches receiving higher coefficients while weaker or redundant matches are suppressed.
 
-<img src="/assets/methods.svg" width="600" alt="Methods diagram">
+<div align="center">
+  <img src="/assets/methods.svg" width="400">
+</div>
 
 ## References
 

From fb58facd1880b1f5eccf4178ace437e8c6777333 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 1 Sep 2025 12:38:02 -0700
Subject: [PATCH 22/39] TF-MoDISco capitalization

---
 README.md                        | 12 ++++++------
 src/finemo/__init__.py           |  4 ++--
 src/finemo/data_io.py            | 24 ++++++++++++------------
 src/finemo/evaluation.py         | 20 ++++++++++----------
 src/finemo/main.py               | 14 +++++++-------
 src/finemo/templates/report.html | 10 +++++-----
 src/finemo/visualization.py      | 10 +++++-----
 7 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/README.md b/README.md
index 196be85..32affa9 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ The algorithm represents contribution scores as weighted combinations of motif c
 - **Competitive motif assignment**: Biologically-motivated algorithm that resolves similar motifs
 - **Context-aware analysis**: Leverages neural network importance scores for improved sensitivity and specificity
 - **Comprehensive evaluation**: Built-in tools for assessing and visualizing motif discovery quality and hit calling performance
-- **Multiple input formats**: Support for bigWig, HDF5, and TF-MoDISCo output formats
+- **Multiple input formats**: Support for bigWig, HDF5, and TF-MoDISco output formats
 
 ## Method
 
@@ -30,7 +30,7 @@ Fi-NeMo is described in:
 > Tseng, Ramalingam, Wang, Schreiber, et al. "Decoding predictive motif lexicons and syntax from deep learning models of transcription factor binding profiles." (manuscript in preparation)
 
 Related tools:
-- [TF-MoDISCo](https://github.com/jmschrei/tfmodisco-lite): *De novo* motif discovery from importance scores
+- [TF-MoDISco](https://github.com/jmschrei/tfmodisco-lite): *De novo* motif discovery from importance scores
 - [BPNet](https://github.com/kundajelab/bpnet-refactor): Deep learning models for TF binding prediction
 - [ChromBPNet](https://github.com/kundajelab/chrombpnet): Deep learning models for chromatin accessibility prediction
 
@@ -164,7 +164,7 @@ Usage: `finemo extract-regions-modisco-fmt -s <sequences> -a <attributions> -o <
 
 #### `finemo call-hits`
 
-Identify motif instances in input regions using the Fi-NeMo competitive optimization algorithm. This is the core functionality that leverages TF-MoDISCo CWMs to find motif occurrences in contribution score data.
+Identify motif instances in input regions using the Fi-NeMo competitive optimization algorithm. This is the core functionality that leverages TF-MoDISco CWMs to find motif occurrences in contribution score data.
 
 Usage: `finemo call-hits -r <regions> -m <modisco_h5> -o <out_dir> [-p <peaks>] [-t <cwm_trim_threshold>] [-l <global_lambda>] [-b <batch_size>] [-J]`
 
@@ -257,14 +257,14 @@ Usage: `finemo call-hits -r <regions> -m <modisco_h5> -o <out_dir> [-p <peaks>]
 
 - **Scale Invariance**: Hit calling depends on motif and contribution score shapes, not absolute magnitudes. Use `hit_coefficient_global` or `hit_importance` for importance-based thresholding.
 - **Competitive Assignment**: Overlapping motif candidates compete; only the best-fitting motif at each position receives a non-zero coefficient.
-- **Legacy Format Support**: Convert older TF-MoDISCo files using `modisco convert` from [tfmodisco-lite](https://github.com/jmschrei/tfmodisco-lite).
+- **Legacy Format Support**: Convert older TF-MoDISco files using `modisco convert` from [tfmodisco-lite](https://github.com/jmschrei/tfmodisco-lite).
 
 ### Output reporting and post-processing
 
 #### `finemo report`
 
-Generate an HTML report (`report.html`) visualizing TF-MoDISCo seqlet recall and hit distributions.
-If `-n/--no-recall` is not set, the regions used for hit calling must exactly match those used during the TF-MoDISCo motif discovery process.
+Generate an HTML report (`report.html`) visualizing TF-MoDISco seqlet recall and hit distributions.
+If `-n/--no-recall` is not set, the regions used for hit calling must exactly match those used during the TF-MoDISco motif discovery process.
 This command does not utilize the GPU.
 
 Usage: `finemo report -r <regions> -H <hits> -o <out_dir> [-m <modisco_h5>] [-W <modisco_region_width>] [-n]`
diff --git a/src/finemo/__init__.py b/src/finemo/__init__.py
index b170d01..1e891a4 100644
--- a/src/finemo/__init__.py
+++ b/src/finemo/__init__.py
@@ -11,7 +11,7 @@
 Key Features
 ------------
 - GPU-accelerated hit calling using PyTorch
-- Support for multiple input formats (bigWig, HDF5, TF-MoDISCo)
+- Support for multiple input formats (bigWig, HDF5, TF-MoDISco)
 - Competitive motif instance assignment
 - Comprehensive evaluation and visualization tools
 - Post-processing utilities for hit refinement
@@ -58,6 +58,6 @@
 
 See Also
 --------
-TF-MoDISCo : https://github.com/jmschrei/tfmodisco-lite
+TF-MoDISco : https://github.com/jmschrei/tfmodisco-lite
 BPNet : https://github.com/kundajelab/bpnet-refactor
 """
diff --git a/src/finemo/data_io.py b/src/finemo/data_io.py
index a604904..d266178 100644
--- a/src/finemo/data_io.py
+++ b/src/finemo/data_io.py
@@ -5,7 +5,7 @@
 - Genome sequences (FASTA format)
 - Contribution scores (bigWig, HDF5 formats)
 - Neural network model outputs
-- Motif data from TF-MoDISCo
+- Motif data from TF-MoDISco
 - Hit calling results
 
 The module supports multiple input formats used for contribution scores
@@ -454,7 +454,7 @@ def load_npy_or_npz(path: str) -> ndarray:
 def load_regions_from_modisco_fmt(
     shaps_paths: List[str], ohe_path: str, half_width: int
 ) -> Tuple[Int[ndarray, "N 4 L"], Float[ndarray, "N 4 L"]]:
-    """Load genomic sequences and contribution scores from TF-MoDISCo format files.
+    """Load genomic sequences and contribution scores from TF-MoDISco format files.
 
     Parameters
     ----------
@@ -708,7 +708,7 @@ def softmax(x: Float[ndarray, "4 W"], temp: float = 100) -> Float[ndarray, "4 W"
 
 
 def _motif_name_sort_key(data: Tuple[str, Any]) -> Union[Tuple[int], Tuple[int, str]]:
-    """Generate sort key for TF-MoDISCo motif names.
+    """Generate sort key for TF-MoDISco motif names.
 
     This function creates a sort key that orders motifs by pattern number,
     with non-standard patterns sorted to the end.
@@ -753,16 +753,16 @@ def load_modisco_motifs(
     motif_lambda_default: float,
     include_rc: bool,
 ) -> Tuple[pl.DataFrame, Float[ndarray, "M 4 W"], Int[ndarray, "M W"], ndarray]:
-    """Load motif data from TF-MoDISCo HDF5 file with customizable processing options.
+    """Load motif data from TF-MoDISco HDF5 file with customizable processing options.
 
     This function extracts contribution weight matrices and associated metadata from
-    TF-MoDISCo results, with support for custom naming, trimming, and regularization
+    TF-MoDISco results, with support for custom naming, trimming, and regularization
     parameters.
 
     Parameters
     ----------
     modisco_h5_path : str
-        Path to TF-MoDISCo HDF5 results file containing pattern groups.
+        Path to TF-MoDISco HDF5 results file containing pattern groups.
     trim_coords : Optional[Dict[str, Tuple[int, int]]]
         Manual trim coordinates for specific motifs {motif_name: (start, end)}.
         Takes precedence over automatic trimming based on thresholds.
@@ -972,16 +972,16 @@ def load_modisco_seqlets(
     modisco_half_width: int,
     lazy: bool = False,
 ) -> Union[pl.DataFrame, pl.LazyFrame]:
-    """Load seqlet data from TF-MoDISCo HDF5 file and convert to genomic coordinates.
+    """Load seqlet data from TF-MoDISco HDF5 file and convert to genomic coordinates.
 
-    This function extracts seqlet instances from TF-MoDISCo results and converts
+    This function extracts seqlet instances from TF-MoDISco results and converts
     their relative positions to absolute genomic coordinates using peak region
     information.
 
     Parameters
     ----------
     modisco_h5_path : str
-        Path to TF-MoDISCo HDF5 results file containing seqlet data.
+        Path to TF-MoDISco HDF5 results file containing seqlet data.
     peaks_df : pl.DataFrame
         DataFrame containing peak region information with columns:
         'peak_id', 'chr', 'chr_id', 'peak_region_start'.
@@ -991,7 +991,7 @@ def load_modisco_seqlets(
     half_width : int
         Half-width of the current analysis regions.
     modisco_half_width : int
-        Half-width of the regions used in the original TF-MoDISCo analysis.
+        Half-width of the regions used in the original TF-MoDISco analysis.
         Used to calculate coordinate offsets.
     lazy : bool, default False
         If True, returns a LazyFrame for efficient chaining of operations.
@@ -1019,7 +1019,7 @@ def load_modisco_seqlets(
     motif name, and reverse complement status to avoid redundant instances.
 
     The coordinate transformation accounts for differences in region sizes
-    between the original TF-MoDISCo analysis and the current analysis.
+    between the original TF-MoDISco analysis and the current analysis.
     """
 
     start_lst = []
@@ -1107,7 +1107,7 @@ def get_pattern_number(x):
 def write_modisco_seqlets(
     seqlets_df: Union[pl.DataFrame, pl.LazyFrame], out_path: str
 ) -> None:
-    """Write TF-MoDISCo seqlets to TSV file.
+    """Write TF-MoDISco seqlets to TSV file.
 
     Parameters
     ----------
diff --git a/src/finemo/evaluation.py b/src/finemo/evaluation.py
index 61a89b2..cf74375 100644
--- a/src/finemo/evaluation.py
+++ b/src/finemo/evaluation.py
@@ -2,7 +2,7 @@
 
 This module provides functions for:
 - Computing motif occurrence statistics and co-occurrence patterns
-- Evaluating motif discovery quality against TF-MoDISCo results
+- Evaluating motif discovery quality against TF-MoDISco results
 - Analyzing hit calling performance and recall metrics
 - Generating confusion matrices for seqlet-hit comparisons
 """
@@ -176,10 +176,10 @@ def tfmodisco_comparison(
     Dict[str, Dict[str, Float[ndarray, "4 W"]]],
     Dict[str, Dict[str, Tuple[int, int]]],
 ]:
-    """Compare Fi-NeMo hits with TF-MoDISCo seqlets and compute evaluation metrics.
+    """Compare Fi-NeMo hits with TF-MoDISco seqlets and compute evaluation metrics.
 
     This function performs comprehensive comparison between Fi-NeMo hit calls
-    and TF-MoDISCo seqlets, computing recall metrics, CWM similarities,
+    and TF-MoDISco seqlets, computing recall metrics, CWM similarities,
     and extracting contribution weight matrices for visualization.
 
     Parameters
@@ -194,14 +194,14 @@ def tfmodisco_comparison(
         Peak metadata with columns:
         - peak_id, chr_id, peak_region_start
     seqlets_df : Optional[pl.DataFrame]
-        TF-MoDISCo seqlets with columns:
+        TF-MoDISco seqlets with columns:
         - chr_id, start_untrimmed, is_revcomp, motif_name
         If None, only basic hit statistics are computed.
     motifs_df : pl.DataFrame
         Motif metadata with columns:
         - motif_name, strand, motif_id, motif_start, motif_end
     cwms_modisco : Float[ndarray, "M 4 W"]
-        TF-MoDISCo contribution weight matrices.
+        TF-MoDISco contribution weight matrices.
         Shape: (n_modisco_motifs, 4, motif_width)
     motif_names : List[str]
         Names of motifs to analyze.
@@ -223,7 +223,7 @@ def tfmodisco_comparison(
     cwms : Dict[str, Dict[str, Float[ndarray, "4 W"]]]
         Extracted CWMs for each motif and condition:
         - hits_fc, hits_rc: Forward/reverse complement hits
-        - modisco_fc, modisco_rc: TF-MoDISCo forward/reverse
+        - modisco_fc, modisco_rc: TF-MoDISco forward/reverse
         - seqlets_only, hits_restricted_only: Non-overlapping instances
     cwm_trim_bounds : Dict[str, Dict[str, Tuple[int, int]]]
         Trimming boundaries for each CWM type and motif.
@@ -231,7 +231,7 @@ def tfmodisco_comparison(
     Notes
     -----
     - Hits are filtered to central region defined by modisco_half_width
-    - CWM similarity is computed as normalized dot product between hit and TF-MoDISCo CWMs
+    - CWM similarity is computed as normalized dot product between hit and TF-MoDISco CWMs
     - Recall metrics require both hits_df and seqlets_df to be non-empty
     - Missing motifs are handled gracefully with empty DataFrames
 
@@ -427,10 +427,10 @@ def seqlet_confusion(
     motif_names: List[str],
     motif_width: int,
 ) -> Tuple[pl.DataFrame, Float[ndarray, "M M"]]:
-    """Compute confusion matrix between TF-MoDISCo seqlets and Fi-NeMo hits.
+    """Compute confusion matrix between TF-MoDISco seqlets and Fi-NeMo hits.
 
     This function creates a confusion matrix showing the overlap between
-    TF-MoDISCo seqlets (ground truth) and Fi-NeMo hits across different motifs.
+    TF-MoDISco seqlets (ground truth) and Fi-NeMo hits across different motifs.
     Overlap frequencies are estimated using binned genomic coordinates.
 
     Parameters
@@ -439,7 +439,7 @@ def seqlet_confusion(
         Fi-NeMo hit calls with required columns:
         - peak_id, start_untrimmed, end_untrimmed, strand, motif_name
     seqlets_df : pl.DataFrame
-        TF-MoDISCo seqlets with required columns:
+        TF-MoDISco seqlets with required columns:
         - chr_id, start_untrimmed, end_untrimmed, motif_name
     peaks_df : pl.DataFrame
         Peak metadata for joining coordinates:
diff --git a/src/finemo/main.py b/src/finemo/main.py
index 16d03af..9ab84f4 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -7,7 +7,7 @@
 - Post-processing operations (hit collapsing, intersection)
 
 The CLI supports multiple input formats including bigWig, HDF5 (ChromBPNet/BPNet),
-and TF-MoDISCo format.
+and TF-MoDISco format.
 """
 
 from . import data_io
@@ -137,7 +137,7 @@ def extract_regions_modisco_fmt(
     out_path: str,
     region_width: int,
 ) -> None:
-    """Extract genomic regions and contribution scores from TF-MoDISCo format files.
+    """Extract genomic regions and contribution scores from TF-MoDISco format files.
 
     Parameters
     ----------
@@ -407,7 +407,7 @@ def report(
     """Generate comprehensive HTML report with statistics and visualizations.
 
     This function creates detailed analysis reports comparing Fi-NeMo hit calling
-    results with TF-MoDISCo seqlets, including performance metrics, distribution
+    results with TF-MoDISco seqlets, including performance metrics, distribution
     plots, and motif visualization. The report provides insights into hit calling
     quality and motif discovery accuracy.
 
@@ -418,7 +418,7 @@ def report(
     hits_dir : str
         Path to directory containing Fi-NeMo hit calling outputs.
     modisco_h5_path : str, optional
-        Path to TF-MoDISCo H5 file. If None, seqlet comparisons are skipped.
+        Path to TF-MoDISco H5 file. If None, seqlet comparisons are skipped.
     peaks_path : str, optional
         DEPRECATED. Peak coordinates should be included in regions file.
     motifs_include_path : str, optional
@@ -428,11 +428,11 @@ def report(
     out_dir : str
         Output directory for report files.
     modisco_region_width : int
-        Width of regions used by TF-MoDISCo (needed for coordinate conversion).
+        Width of regions used by TF-MoDISco (needed for coordinate conversion).
     cwm_trim_threshold : float
         DEPRECATED. This information is inferred from hit calling outputs.
     compute_recall : bool
-        Whether to compute recall metrics against TF-MoDISCo seqlets.
+        Whether to compute recall metrics against TF-MoDISco seqlets.
     use_seqlets : bool
         Whether to include seqlet-based comparisons in the report.
 
@@ -674,7 +674,7 @@ def cli() -> None:
     """Command-line interface for the Fi-NeMo motif instance calling pipeline.
 
     This function provides the main entry point for all Fi-NeMo operations including:
-    - Data preprocessing from various formats (bigWig, HDF5, TF-MoDISCo)
+    - Data preprocessing from various formats (bigWig, HDF5, TF-MoDISco)
     - Motif hit calling using the Fi-NeMo algorithm
     - Report generation and visualization
     - Post-processing operations (hit collapsing, intersection)
diff --git a/src/finemo/templates/report.html b/src/finemo/templates/report.html
index a04417b..a2389d1 100644
--- a/src/finemo/templates/report.html
+++ b/src/finemo/templates/report.html
@@ -185,7 +185,7 @@ <h1>Fi-NeMo Motif Hit Calling Report</h1>
     a GPU-accelerated method for identifying transcription factor binding sites using neural network
     contribution scores. Fi-NeMo uses a competitive optimization approach to comprehensively map motif
     instances by solving a sparse linear reconstruction problem. The report compares Fi-NeMo hits 
-    with TF-MoDISCo seqlets (when available) and provides detailed statistics on hit quality and 
+    with TF-MoDISco seqlets (when available) and provides detailed statistics on hit quality and 
     motif discovery performance.
 </p>
 
@@ -207,10 +207,10 @@ <h2>TF-MoDISco seqlet comparisons</h2>
 
 <h3>Hit vs. seqlet counts</h3>
 <p>
-    This scatter plot compares the number of motif instances called by Fi-NeMo versus the number of TF-MoDISCo seqlets 
+    This scatter plot compares the number of motif instances called by Fi-NeMo versus the number of TF-MoDISco seqlets 
     identified for each motif. The dashed line represents perfect agreement (<code>y = x</code>). Fi-NeMo typically identifies 
-    an order of magnitude more motif instances than TF-MoDISCo because: (1) TF-MoDISCo applies stringent filtering criteria 
-    during seqlet identification, and (2) TF-MoDISCo often analyzes smaller genomic windows than those used for hit calling.
+    an order of magnitude more motif instances than TF-MoDISco because: (1) TF-MoDISco applies stringent filtering criteria 
+    during seqlet identification, and (2) TF-MoDISco often analyzes smaller genomic windows than those used for hit calling.
 </p>
 <img src="hit_vs_seqlet_counts.svg" width="780">
 {% endif %}
@@ -218,7 +218,7 @@ <h3>Hit vs. seqlet counts</h3>
 <h3>Motif-specific hit and seqlet analysis</h3>
 <p>
     This table provides detailed statistics for each motif, comparing the consistency between Fi-NeMo hits 
-    and TF-MoDISCo seqlets. The analysis includes hit counts, overlap statistics, and visual comparisons 
+    and TF-MoDISco seqlets. The analysis includes hit counts, overlap statistics, and visual comparisons 
     of contribution weight matrices (CWMs).
 </p>
 
diff --git a/src/finemo/visualization.py b/src/finemo/visualization.py
index 6346857..74e4aa2 100644
--- a/src/finemo/visualization.py
+++ b/src/finemo/visualization.py
@@ -28,9 +28,9 @@
 
 
 def abbreviate_motif_name(name: str) -> str:
-    """Convert TF-MoDISCo motif names to abbreviated format.
+    """Convert TF-MoDISco motif names to abbreviated format.
 
-    Converts full TF-MoDISCo pattern names to shorter, more readable format
+    Converts full TF-MoDISco pattern names to shorter, more readable format
     for display in plots and reports.
 
     Parameters
@@ -277,7 +277,7 @@ def plot_seqlet_confusion_heatmap(
 ) -> None:
     """Plot confusion matrix heatmap comparing seqlets to hit calls.
 
-    Creates a heatmap showing the overlap between TF-MoDISCo seqlets and
+    Creates a heatmap showing the overlap between TF-MoDISco seqlets and
     Fi-NeMo hit calls. Rows represent seqlet motifs, columns represent hit motifs.
 
     Parameters
@@ -578,7 +578,7 @@ def plot_hit_vs_seqlet_counts(
     """Plot scatter plot comparing hit counts to seqlet counts per motif.
 
     Creates a log-log scatter plot showing the relationship between the number
-    of hits called by Fi-NeMo and the number of seqlets identified by TF-MoDISCo
+    of hits called by Fi-NeMo and the number of seqlets identified by TF-MoDISco
     for each motif. Includes diagonal reference line and motif annotations.
 
     Parameters
@@ -660,7 +660,7 @@ def write_report(
         Whether recall metrics were computed and should be included
         in the report template.
     use_seqlets : bool
-        Whether TF-MoDISCo seqlet data was used in the analysis
+        Whether TF-MoDISco seqlet data was used in the analysis
         and should be referenced in the report.
 
     Notes

From 03fb6aa8f566348ccd4fd5c92611d3097376ea68 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 1 Sep 2025 12:40:04 -0700
Subject: [PATCH 23/39] Update license

---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index dd86235..ebb7494 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2023 Austin Wang
+Copyright (c) 2025 Austin Wang
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

From abf970fe91ffa80586b34b152dc7f98d870dec1b Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 1 Sep 2025 12:54:02 -0700
Subject: [PATCH 24/39] API docs

---
 .github/workflows/docs.yml | 33 +++++++++++++++++++++++++++++++++
 .gitignore                 |  1 +
 2 files changed, 34 insertions(+)
 create mode 100644 .github/workflows/docs.yml

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..a5346ae
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,33 @@
+name: Generate API Documentation
+
+on:
+  push:
+    branches: [ dev ]  
+  workflow_dispatch:
+
+jobs:
+  deploy-docs:
+    runs-on: ubuntu-latest
+    
+    steps:
+    - uses: actions/checkout@v3
+    
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.x'
+    
+    - name: Install dependencies
+      run: |
+        pip install pdoc
+        pip install -e .
+    
+    - name: Generate documentation
+      run: |
+        pdoc --html --output-dir ./docs --force finemo
+    
+    - name: Deploy to GitHub Pages
+      uses: peaceiris/actions-gh-pages@v3
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        publish_dir: ./docs/finemo
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 78dbf8b..5e3aa36 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 *.egg-info
 __pycache__
 /.*
+!/.github
 /notebooks
 /scratch.txt
 /scratch
\ No newline at end of file

From 4c2f2faea90a6c28201698b62384944a92dc2f7d Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 1 Sep 2025 12:58:15 -0700
Subject: [PATCH 25/39] API docs

---
 .github/workflows/docs.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index a5346ae..8499f19 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -24,10 +24,10 @@ jobs:
     
     - name: Generate documentation
       run: |
-        pdoc --html --output-dir ./docs --force finemo
+        pdoc -d numpy --output-dir ./docs finemo
     
     - name: Deploy to GitHub Pages
       uses: peaceiris/actions-gh-pages@v3
       with:
         github_token: ${{ secrets.GITHUB_TOKEN }}
-        publish_dir: ./docs/finemo
\ No newline at end of file
+        publish_dir: ./docs
\ No newline at end of file

From b4998f9d8639379a8572cb8aa9da98355515e554 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 1 Sep 2025 13:01:53 -0700
Subject: [PATCH 26/39] Fix missing dependencies

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 747bb6a..2344165 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,8 @@ dependencies = [
     "tqdm",
     "pyBigWig",
     "pyfaidx",
-    "jinja2"
+    "jinja2",
+    "jaxtyping"
 ]
 
 [project.scripts]

From 1736b28674a1a141076007eb5cbf5895e13dd66b Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 1 Sep 2025 13:22:59 -0700
Subject: [PATCH 27/39] Module-based entry point

---
 src/finemo/__main__.py | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 src/finemo/__main__.py

diff --git a/src/finemo/__main__.py b/src/finemo/__main__.py
new file mode 100644
index 0000000..4a6bf2e
--- /dev/null
+++ b/src/finemo/__main__.py
@@ -0,0 +1,8 @@
+"""
+Entry point for running finemo's CLI as a module via 'python -m finemo'.
+"""
+
+from .main import cli
+
+if __name__ == "__main__":
+    cli()

From 6e065fd4aa6375791c2f59d165363db8b291b142 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 1 Sep 2025 13:45:04 -0700
Subject: [PATCH 28/39] Move defaults to functions

---
 src/finemo/__init__.py  |  13 ++--
 src/finemo/hitcaller.py |  38 +++++------
 src/finemo/main.py      | 145 ++++++++++++++++++++++------------------
 3 files changed, 105 insertions(+), 91 deletions(-)

diff --git a/src/finemo/__init__.py b/src/finemo/__init__.py
index 1e891a4..d3bb472 100644
--- a/src/finemo/__init__.py
+++ b/src/finemo/__init__.py
@@ -18,12 +18,12 @@
 
 Modules
 -------
-hitcaller : Core Fi-NeMo algorithm implementation
-data_io : Data input/output utilities
-main : Command-line interface
-evaluation : Performance assessment tools
-visualization : Plotting and report generation
-postprocessing : Hit refinement and analysis
+- hitcaller : Core Fi-NeMo algorithm implementation
+- data_io : Data input/output utilities
+- main : Command-line interface
+- evaluation : Performance assessment tools
+- visualization : Plotting and report generation
+- postprocessing : Hit refinement and analysis
 
 Examples
 --------
@@ -60,4 +60,5 @@
 --------
 TF-MoDISco : https://github.com/jmschrei/tfmodisco-lite
 BPNet : https://github.com/kundajelab/bpnet-refactor
+ChromBPNet: https://github.com/kundajelab/chrombpnet
 """
diff --git a/src/finemo/hitcaller.py b/src/finemo/hitcaller.py
index b95c360..a968b44 100644
--- a/src/finemo/hitcaller.py
+++ b/src/finemo/hitcaller.py
@@ -418,16 +418,16 @@ def fit_contribs(
     cwm_trim_mask: Float[ndarray, "M W"],
     use_hypothetical: bool,
     lambdas: Float[ndarray, " M"],
-    step_size_max: float,
-    step_size_min: float,
-    sqrt_transform: bool,
-    convergence_tol: float,
-    max_steps: int,
-    batch_size: int,
-    step_adjust: float,
-    post_filter: bool,
-    device: Optional[torch.device],
-    compile_optimizer: bool,
+    step_size_max: float = 3.0,
+    step_size_min: float = 0.08,
+    sqrt_transform: bool = False,
+    convergence_tol: float = 0.0005,
+    max_steps: int = 10000,
+    batch_size: int = 2000,
+    step_adjust: float = 0.7,
+    post_filter: bool = True,
+    device: Optional[torch.device] = None,
+    compile_optimizer: bool = False,
     eps: float = 1.0,
 ) -> Tuple[pl.DataFrame, pl.DataFrame]:
     """Call motif hits by fitting sparse linear model to contribution scores.
@@ -454,25 +454,25 @@ def fit_contribs(
         projected scores (False).
     lambdas : Float[ndarray, "M"]
         L1 regularization weights for each motif.
-    step_size_max : float
+    step_size_max : float, default 3.0
         Maximum optimization step size.
-    step_size_min : float
+    step_size_min : float, default 0.08
         Minimum optimization step size (for convergence failure detection).
-    sqrt_transform : bool
+    sqrt_transform : bool, default False
         Whether to apply signed square root transformation to inputs.
-    convergence_tol : float
+    convergence_tol : float, default 0.0005
         Convergence tolerance based on duality gap.
-    max_steps : int
+    max_steps : int, default 10000
         Maximum number of optimization steps.
-    batch_size : int
+    batch_size : int, default 2000
         Number of regions to process simultaneously.
-    step_adjust : float
+    step_adjust : float, default 0.7
         Factor to reduce step size when optimization diverges.
-    post_filter : bool
+    post_filter : bool, default True
         Whether to filter hits based on similarity threshold.
     device : torch.device, optional
         Target device for computation. Auto-detected if None.
-    compile_optimizer : bool
+    compile_optimizer : bool, default False
         Whether to JIT compile the optimizer for speed.
     eps : float, default 1.0
         Small constant for numerical stability.
diff --git a/src/finemo/main.py b/src/finemo/main.py
index 9ab84f4..fe8623f 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -15,6 +15,7 @@
 import os
 import argparse
 import warnings
+import inspect
 from typing import Optional, List
 
 import polars as pl
@@ -26,7 +27,7 @@ def extract_regions_bw(
     fa_path: str,
     bw_paths: List[str],
     out_path: str,
-    region_width: int,
+    region_width: int = 1000,
 ) -> None:
     """Extract genomic regions and contribution scores from bigWig and FASTA files.
 
@@ -42,7 +43,7 @@ def extract_regions_bw(
         List of bigWig file paths containing contribution scores.
     out_path : str
         Output path for NPZ file.
-    region_width : int
+    region_width : int, default 1000
         Width of regions to extract around peak summits.
 
     Notes
@@ -66,7 +67,7 @@ def extract_regions_chrombpnet_h5(
     chrom_order_path: Optional[str],
     h5_paths: List[str],
     out_path: str,
-    region_width: int,
+    region_width: int = 1000,
 ) -> None:
     """Extract genomic regions and contribution scores from ChromBPNet HDF5 files.
 
@@ -80,7 +81,7 @@ def extract_regions_chrombpnet_h5(
         List of ChromBPNet HDF5 file paths.
     out_path : str
         Output path for NPZ file.
-    region_width : int
+    region_width : int, default 1000
         Width of regions to extract around peak summits.
     """
     half_width = region_width // 2
@@ -100,7 +101,7 @@ def extract_regions_bpnet_h5(
     chrom_order_path: Optional[str],
     h5_paths: List[str],
     out_path: str,
-    region_width: int,
+    region_width: int = 1000,
 ) -> None:
     """Extract genomic regions and contribution scores from BPNet HDF5 files.
 
@@ -114,7 +115,7 @@ def extract_regions_bpnet_h5(
         List of BPNet HDF5 file paths.
     out_path : str
         Output path for NPZ file.
-    region_width : int
+    region_width : int, default 1000
         Width of regions to extract around peak summits.
     """
     half_width = region_width // 2
@@ -135,7 +136,7 @@ def extract_regions_modisco_fmt(
     shaps_paths: List[str],
     ohe_path: str,
     out_path: str,
-    region_width: int,
+    region_width: int = 1000,
 ) -> None:
     """Extract genomic regions and contribution scores from TF-MoDISco format files.
 
@@ -151,7 +152,7 @@ def extract_regions_modisco_fmt(
         Path to .npy/.npz file containing one-hot encoded sequences.
     out_path : str
         Output path for NPZ file.
-    region_width : int
+    region_width : int, default 1000
         Width of regions to extract around peak summits.
     """
     half_width = region_width // 2
@@ -177,21 +178,21 @@ def call_hits(
     motif_names_path: Optional[str],
     motif_lambdas_path: Optional[str],
     out_dir: str,
-    cwm_trim_coords_path: Optional[str],
-    cwm_trim_thresholds_path: Optional[str],
-    cwm_trim_threshold_default: float,
-    lambda_default: float,
-    step_size_max: float,
-    step_size_min: float,
-    sqrt_transform: bool,
-    convergence_tol: float,
-    max_steps: int,
-    batch_size: int,
-    step_adjust: float,
-    device: Optional[str],
-    mode: str,
-    no_post_filter: bool,
-    compile_optimizer: bool,
+    cwm_trim_coords_path: Optional[str] = None,
+    cwm_trim_thresholds_path: Optional[str] = None,
+    cwm_trim_threshold_default: float = 0.3,
+    lambda_default: float = 0.7,
+    step_size_max: float = 3.0,
+    step_size_min: float = 0.08,
+    sqrt_transform: bool = False,
+    convergence_tol: float = 0.0005,
+    max_steps: int = 10000,
+    batch_size: int = 2000,
+    step_adjust: float = 0.7,
+    device: Optional[str] = None,
+    mode: str = "pp",
+    no_post_filter: bool = False,
+    compile_optimizer: bool = False,
 ) -> None:
     """Call motif hits using the Fi-NeMo algorithm on preprocessed genomic regions.
 
@@ -224,31 +225,31 @@ def call_hits(
         Path to file specifying custom motif trimming coordinates.
     cwm_trim_thresholds_path : str, optional
         Path to file specifying custom motif trimming thresholds.
-    cwm_trim_threshold_default : float
-        Default threshold for motif trimming (typically 0.3).
-    lambda_default : float
-        Default L1 regularization weight (typically 0.7).
-    step_size_max : float
+    cwm_trim_threshold_default : float, default 0.3
+        Default threshold for motif trimming.
+    lambda_default : float, default 0.7
+        Default L1 regularization weight.
+    step_size_max : float, default 3.0
         Maximum optimization step size.
-    step_size_min : float
+    step_size_min : float, default 0.08
         Minimum optimization step size.
-    sqrt_transform : bool
+    sqrt_transform : bool, default False
         Whether to apply signed square root transform to contributions.
-    convergence_tol : float
+    convergence_tol : float, default 0.0005
         Convergence tolerance for duality gap.
-    max_steps : int
+    max_steps : int, default 10000
         Maximum number of optimization steps.
-    batch_size : int
+    batch_size : int, default 2000
         Batch size for GPU processing.
-    step_adjust : float
+    step_adjust : float, default 0.7
         Step size adjustment factor on divergence.
     device : str, optional
         DEPRECATED. Use CUDA_VISIBLE_DEVICES environment variable instead.
-    mode : str
+    mode : str, default "pp"
         Contribution type mode ('pp', 'ph', 'hp', 'hh') where 'p'=projected, 'h'=hypothetical.
-    no_post_filter : bool
+    no_post_filter : bool, default False
         If True, skip post-hit-calling similarity filtering.
-    compile_optimizer : bool
+    compile_optimizer : bool, default False
         Whether to JIT-compile the optimizer for speed.
 
     Notes
@@ -399,10 +400,10 @@ def report(
     motifs_include_path: Optional[str],
     motif_names_path: Optional[str],
     out_dir: str,
-    modisco_region_width: int,
-    cwm_trim_threshold: float,
-    compute_recall: bool,
-    use_seqlets: bool,
+    modisco_region_width: int = 400,
+    cwm_trim_threshold: float = 0.3,
+    compute_recall: bool = True,
+    use_seqlets: bool = True,
 ) -> None:
     """Generate comprehensive HTML report with statistics and visualizations.
 
@@ -427,13 +428,13 @@ def report(
         DEPRECATED. This information is inferred from hit calling outputs.
     out_dir : str
         Output directory for report files.
-    modisco_region_width : int
+    modisco_region_width : int, default 400
         Width of regions used by TF-MoDISco (needed for coordinate conversion).
-    cwm_trim_threshold : float
+    cwm_trim_threshold : float, default 0.3
         DEPRECATED. This information is inferred from hit calling outputs.
-    compute_recall : bool
+    compute_recall : bool, default True
         Whether to compute recall metrics against TF-MoDISco seqlets.
-    use_seqlets : bool
+    use_seqlets : bool, default True
         Whether to include seqlet-based comparisons in the report.
 
     Notes
@@ -603,7 +604,7 @@ def report(
     )
 
 
-def collapse_hits(hits_path: str, out_path: str, overlap_frac: float) -> None:
+def collapse_hits(hits_path: str, out_path: str, overlap_frac: float = 0.2) -> None:
     """Collapse overlapping hits by selecting the best hit per overlapping group.
 
     This function processes a set of motif hits and identifies overlapping hits,
@@ -617,7 +618,7 @@ def collapse_hits(hits_path: str, out_path: str, overlap_frac: float) -> None:
         Path to input TSV file containing hit data (hits.tsv or hits_unique.tsv).
     out_path : str
         Path to output TSV file with additional 'is_primary' column.
-    overlap_frac : float
+    overlap_frac : float, default 0.2
         Minimum fractional overlap for considering hits as overlapping.
         For hits of lengths x and y, minimum overlap = overlap_frac * (x + y) / 2.
 
@@ -637,7 +638,7 @@ def collapse_hits(hits_path: str, out_path: str, overlap_frac: float) -> None:
     )
 
 
-def intersect_hits(hits_paths: List[str], out_path: str, relaxed: bool) -> None:
+def intersect_hits(hits_paths: List[str], out_path: str, relaxed: bool = False) -> None:
     """Find intersection of hits across multiple Fi-NeMo runs.
 
     This function identifies motif hits that are consistently called across
@@ -651,7 +652,7 @@ def intersect_hits(hits_paths: List[str], out_path: str, relaxed: bool) -> None:
     out_path : str
         Path to output TSV file containing intersection results.
         Duplicate columns are suffixed with run index.
-    relaxed : bool
+    relaxed : bool, default False
         If True, uses relaxed intersection criteria based only on motif names
         and untrimmed coordinates. If False, assumes consistent region definitions
         and motif trimming across runs.
@@ -733,7 +734,9 @@ def cli() -> None:
         "-w",
         "--region-width",
         type=int,
-        default=1000,
+        default=inspect.signature(extract_regions_bw)
+        .parameters["region_width"]
+        .default,
         help="The width of the input region centered around each peak summit.",
     )
 
@@ -779,7 +782,9 @@ def cli() -> None:
         "-w",
         "--region-width",
         type=int,
-        default=1000,
+        default=inspect.signature(extract_regions_chrombpnet_h5)
+        .parameters["region_width"]
+        .default,
         help="The width of the input region centered around each peak summit.",
     )
 
@@ -825,7 +830,9 @@ def cli() -> None:
         "-w",
         "--region-width",
         type=int,
-        default=1000,
+        default=inspect.signature(extract_regions_chrombpnet_h5)
+        .parameters["region_width"]
+        .default,
         help="The width of the input region centered around each peak summit.",
     )
 
@@ -871,7 +878,9 @@ def cli() -> None:
         "-w",
         "--region-width",
         type=int,
-        default=1000,
+        default=inspect.signature(extract_regions_bpnet_h5)
+        .parameters["region_width"]
+        .default,
         help="The width of the input region centered around each peak summit.",
     )
 
@@ -925,7 +934,9 @@ def cli() -> None:
         "-w",
         "--region-width",
         type=int,
-        default=1000,
+        default=inspect.signature(extract_regions_modisco_fmt)
+        .parameters["region_width"]
+        .default,
         help="The width of the input region centered around each peak summit.",
     )
 
@@ -939,7 +950,7 @@ def cli() -> None:
         "-M",
         "--mode",
         type=str,
-        default="pp",
+        default=inspect.signature(call_hits).parameters["mode"].default,
         choices={"pp", "ph", "hp", "hh"},
         help="The type of attributions to use for CWM's and input contribution scores, respectively. 'h' for hypothetical and 'p' for projected.",
     )
@@ -1001,7 +1012,9 @@ def cli() -> None:
         "-t",
         "--cwm-trim-threshold",
         type=float,
-        default=0.3,
+        default=inspect.signature(call_hits)
+        .parameters["cwm_trim_threshold_default"]
+        .default,
         help="The default threshold to determine motif start and end positions within the full CWMs.",
     )
     call_hits_parser.add_argument(
@@ -1023,7 +1036,7 @@ def cli() -> None:
         "-l",
         "--global-lambda",
         type=float,
-        default=0.7,
+        default=inspect.signature(call_hits).parameters["lambda_default"].default,
         help="The default L1 regularization weight determining the sparsity of hits.",
     )
     call_hits_parser.add_argument(
@@ -1064,42 +1077,42 @@ def cli() -> None:
         "-s",
         "--step-size-max",
         type=float,
-        default=3.0,
+        default=inspect.signature(call_hits).parameters["step_size_max"].default,
         help="The maximum optimizer step size.",
     )
     call_hits_parser.add_argument(
         "-i",
         "--step-size-min",
         type=float,
-        default=0.08,
+        default=inspect.signature(call_hits).parameters["step_size_min"].default,
         help="The minimum optimizer step size.",
     )
     call_hits_parser.add_argument(
         "-j",
         "--step-adjust",
         type=float,
-        default=0.7,
+        default=inspect.signature(call_hits).parameters["step_adjust"].default,
         help="The optimizer step size adjustment factor. If the optimizer diverges, the step size is multiplicatively adjusted by this factor",
     )
     call_hits_parser.add_argument(
         "-c",
         "--convergence-tol",
         type=float,
-        default=0.0005,
+        default=inspect.signature(call_hits).parameters["convergence_tol"].default,
         help="The tolerance for determining convergence. The optimizer exits when the duality gap is less than the tolerance.",
     )
     call_hits_parser.add_argument(
         "-S",
         "--max-steps",
         type=int,
-        default=10000,
+        default=inspect.signature(call_hits).parameters["max_steps"].default,
         help="The maximum number of optimization steps.",
     )
     call_hits_parser.add_argument(
         "-b",
         "--batch-size",
         type=int,
-        default=2000,
+        default=inspect.signature(call_hits).parameters["batch_size"].default,
         help="The batch size used for optimization.",
     )
     call_hits_parser.add_argument(
@@ -1177,14 +1190,14 @@ def cli() -> None:
         "-W",
         "--modisco-region-width",
         type=int,
-        default=400,
+        default=inspect.signature(report).parameters["modisco_region_width"].default,
         help="The width of the region around each peak summit used by tfmodisco-lite.",
     )
     report_parser.add_argument(
         "-t",
         "--cwm-trim-threshold",
         type=float,
-        default=0.3,
+        default=inspect.signature(report).parameters["cwm_trim_threshold"].default,
         help="DEPRECATED: This information is now inferred from the outputs of `finemo call-hits`.",
     )
     report_parser.add_argument(
@@ -1224,7 +1237,7 @@ def cli() -> None:
         "-O",
         "--overlap-frac",
         type=float,
-        default=0.2,
+        default=inspect.signature(collapse_hits).parameters["overlap_frac"].default,
         help="The threshold for determining overlapping hits. For two hits with lengths x and y, the minimum overlap is defined as `overlap_frac * (x + y) / 2`. The default value of 0.2 means that two hits must overlap by at least 20% of their average lengths to be considered overlapping.",
     )
 

From 5a1489c69ee8690466446f61dc261a37ebc9e1c1 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 1 Sep 2025 13:50:31 -0700
Subject: [PATCH 29/39] Link to API docs

---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 32affa9..0b7ce64 100644
--- a/README.md
+++ b/README.md
@@ -90,7 +90,11 @@ Recommended:
 
 - Peak region coordinates in uncompressed [ENCODE NarrowPeak](https://genome.ucsc.edu/FAQ/FAQformat.html#format12) format.
 
-## Usage
+## API Documentation
+
+For detailed Python API documentation, see: https://www.austintwang.com/finemo_gpu/finemo.html
+
+## Command-Line Usage
 
 Fi-NeMo provides a command-line utility named `finemo` for motif instance calling and analysis. The typical workflow involves three main steps:
 

From 36ed9ea27231dd2b147f3529faac4f4432989ab2 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 1 Sep 2025 14:04:07 -0700
Subject: [PATCH 30/39] Define `__all__`

---
 README.md              |  2 +-
 src/finemo/__init__.py | 16 ++++++++++++++++
 src/finemo/main.py     |  3 ---
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 0b7ce64..e64a859 100644
--- a/README.md
+++ b/README.md
@@ -92,7 +92,7 @@ Recommended:
 
 ## API Documentation
 
-For detailed Python API documentation, see: https://www.austintwang.com/finemo_gpu/finemo.html
+For Fi-NeMo's Python API documentation, see: https://www.austintwang.com/finemo_gpu/finemo.html
 
 ## Command-Line Usage
 
diff --git a/src/finemo/__init__.py b/src/finemo/__init__.py
index d3bb472..dc14e91 100644
--- a/src/finemo/__init__.py
+++ b/src/finemo/__init__.py
@@ -62,3 +62,19 @@
 BPNet : https://github.com/kundajelab/bpnet-refactor
 ChromBPNet: https://github.com/kundajelab/chrombpnet
 """
+
+from . import data_io
+from . import hitcaller
+from . import evaluation
+from . import visualization
+from . import postprocessing
+from . import main
+
+__all__ = [
+    "data_io",
+    "hitcaller",
+    "evaluation",
+    "visualization",
+    "postprocessing",
+    "main",
+]
diff --git a/src/finemo/main.py b/src/finemo/main.py
index fe8623f..3ffb2af 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -679,9 +679,6 @@ def cli() -> None:
     - Motif hit calling using the Fi-NeMo algorithm
     - Report generation and visualization
     - Post-processing operations (hit collapsing, intersection)
-
-    The CLI supports comprehensive workflows for transcription factor motif
-    analysis from raw genomic data to publication-ready visualizations.
     """
     parser = argparse.ArgumentParser()
     subparsers = parser.add_subparsers(required=True, dest="cmd")

From 6f130307a9dff03c5620c231b82e2d461bb3d528 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 1 Sep 2025 14:22:57 -0700
Subject: [PATCH 31/39] README tweaks

---
 README.md              | 5 ++---
 src/finemo/__init__.py | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index e64a859..1b1ba64 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-# Fi-NeMo: Finding Neural network Motifs
+# Fi-NeMo: Finding Neural Network Motifs
 
-**Fi-NeMo** (**Fi**nding **Ne**ural network **Mo**tifs) is a GPU-accelerated motif instance calling tool for identifying transcription factor binding sites from neural network contribution scores.
+**Fi-NeMo** (**Fi**nding **Ne**ural Network **Mo**tifs) is a GPU-accelerated motif instance calling tool for identifying transcription factor binding sites from neural network contribution scores.
 
 ## Overview
 
@@ -260,7 +260,6 @@ Usage: `finemo call-hits -r <regions> -m <modisco_h5> -o <out_dir> [-p <peaks>]
 #### Important Notes
 
 - **Scale Invariance**: Hit calling depends on motif and contribution score shapes, not absolute magnitudes. Use `hit_coefficient_global` or `hit_importance` for importance-based thresholding.
-- **Competitive Assignment**: Overlapping motif candidates compete; only the best-fitting motif at each position receives a non-zero coefficient.
 - **Legacy Format Support**: Convert older TF-MoDISco files using `modisco convert` from [tfmodisco-lite](https://github.com/jmschrei/tfmodisco-lite).
 
 ### Output reporting and post-processing
diff --git a/src/finemo/__init__.py b/src/finemo/__init__.py
index dc14e91..6b18638 100644
--- a/src/finemo/__init__.py
+++ b/src/finemo/__init__.py
@@ -1,4 +1,4 @@
-"""Fi-NeMo: Finding Neural network Motifs.
+"""Fi-NeMo: Finding Neural Network Motifs.
 
 A GPU-accelerated motif instance calling tool for identifying transcription factor
 binding sites from neural network contribution scores.

From fb962d4d0c4027d43d0d1c0e6b22052ced6fc5c8 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Mon, 1 Sep 2025 18:45:12 -0700
Subject: [PATCH 32/39] Make dimension names consistent

---
 src/finemo/hitcaller.py | 178 +++++++++++++++++++++-------------------
 1 file changed, 92 insertions(+), 86 deletions(-)

diff --git a/src/finemo/hitcaller.py b/src/finemo/hitcaller.py
index a968b44..27cea8c 100644
--- a/src/finemo/hitcaller.py
+++ b/src/finemo/hitcaller.py
@@ -24,9 +24,6 @@
 
 from tqdm import tqdm
 
-# Type aliases for tensor operations
-ArrayLike = Union[ndarray, torch.Tensor]
-
 
 def prox_grad_step(
     coefficients: Float[Tensor, "B M P"],
@@ -44,8 +41,12 @@ def prox_grad_step(
     The goal is to represent contribution scores as a sparse linear combination
     of motif contribution weight matrices (CWMs).
 
-    B = batch size, M = number of motifs, L = sequence length, W = motif width.
-    P = L - W + 1 (the number of positions with coefficients).
+    Dimension notation:
+    - B = batch size (number of regions processed simultaneously)
+    - M = number of motifs
+    - L = sequence length
+    - W = motif width (length of each motif)
+    - P = L - W + 1 (number of valid motif positions)
 
     Parameters
     ----------
@@ -67,12 +68,12 @@ def prox_grad_step(
 
     Returns
     -------
-    c_next : Float[Tensor, "B M P"], shape (b, m, l - w + 1)
-        Updated coefficient matrix after the optimization step.
-    dual_gap : Float[Tensor, "B"]
-        Duality gap for convergence assessment.
-    nll : Float[Tensor, "B"]
-        Negative log likelihood (proportional to MSE).
+    c_next : Float[Tensor, "B M P"]
+        Updated coefficient matrix after the optimization step (shape: batch_size × motifs × positions).
+    dual_gap : Float[Tensor, " B"]
+        Duality gap for convergence assessment (shape: batch_size).
+    nll : Float[Tensor, " B"]
+        Negative log likelihood (proportional to MSE, shape: batch_size).
 
     Notes
     -----
@@ -89,31 +90,31 @@ def prox_grad_step(
     """
     # Forward pass: convolution operations require specific tensor layouts
     coef_adj = coefficients * importance_scale
-    pred_unmasked = F.conv_transpose1d(coef_adj, cwms)  # (b, 4, l)
+    pred_unmasked = F.conv_transpose1d(coef_adj, cwms)  # (B, 4, L)
     pred = (
         pred_unmasked * sequences
-    )  # (b, 4, l), element-wise masking for projected mode
+    )  # (B, 4, L), element-wise masking for projected mode
 
     # Compute gradient * -1
-    residuals = contribs - pred  # (b, 4, l)
-    ngrad = F.conv1d(residuals, cwms) * importance_scale  # (b, m, l - w + 1)
+    residuals = contribs - pred  # (B, 4, L)
+    ngrad = F.conv1d(residuals, cwms) * importance_scale  # (B, M, P)
 
     # Negative log likelihood (proportional to MSE)
-    nll = (residuals**2).sum(dim=(1, 2))  # (b)
+    nll = (residuals**2).sum(dim=(1, 2))  # (B)
 
     # Compute duality gap for convergence assessment
-    dual_norm = (ngrad / lambdas).amax(dim=(1, 2))  # (b)
-    dual_scale = (torch.clamp(1 / dual_norm, max=1.0) ** 2 + 1) / 2  # (b)
-    nll_scaled = nll * dual_scale  # (b)
+    dual_norm = (ngrad / lambdas).amax(dim=(1, 2))  # (B)
+    dual_scale = (torch.clamp(1 / dual_norm, max=1.0) ** 2 + 1) / 2  # (B)
+    nll_scaled = nll * dual_scale  # (B)
 
-    dual_diff = (residuals * contribs).sum(dim=(1, 2))  # (b)
+    dual_diff = (residuals * contribs).sum(dim=(1, 2))  # (B)
     l1_term = (torch.abs(coefficients).sum(dim=2, keepdim=True) * lambdas).sum(
         dim=(1, 2)
-    )  # (b)
-    dual_gap = (nll_scaled - dual_diff + l1_term).abs()  # (b)
+    )  # (B)
+    dual_gap = (nll_scaled - dual_diff + l1_term).abs()  # (B)
 
     # Compute proximal gradient descent step
-    c_next = coefficients + step_sizes * (ngrad - lambdas)  # (b, m, l - w + 1)
+    c_next = coefficients + step_sizes * (ngrad - lambdas)  # (B, M, P)
     c_next = F.relu(c_next)  # Ensure non-negativity constraint
 
     return c_next, dual_gap, nll
@@ -128,7 +129,7 @@ def optimizer_step(
     coef: Float[Tensor, "B M P"],
     i: Float[Tensor, "B 1 1"],
     step_sizes: Float[Tensor, "B 1 1"],
-    sequence_length: int,
+    L: int,
     lambdas: Float[Tensor, "1 M 1"],
 ) -> Tuple[
     Float[Tensor, "B M P"],
@@ -142,8 +143,12 @@ def optimizer_step(
     to improve convergence speed while maintaining the non-negative constraint
     on coefficients.
 
-    B = batch size, M = number of motifs, L = sequence length, W = motif width.
-    P = L - W + 1 (the number of positions with coefficients).
+    Dimension notation:
+    - B = batch size (number of regions processed simultaneously)
+    - M = number of motifs
+    - L = sequence length
+    - W = motif width (length of each motif)
+    - P = L - W + 1 (number of valid motif positions)
 
     Parameters
     ----------
@@ -163,7 +168,7 @@ def optimizer_step(
         Iteration counter for each batch element.
     step_sizes :  Float[Tensor, "B 1 1"]
         Step sizes for optimization.
-    sequence_length : int
+    L : int
         Sequence length for normalization.
     lambdas : Float[Tensor, "1 M 1"]
         Regularization parameters.
@@ -171,13 +176,13 @@ def optimizer_step(
     Returns
     -------
     coef_inter : Float[Tensor, "B M P"]
-        Updated intermediate coefficients with momentum.
+        Updated intermediate coefficients with momentum (shape: batch_size × motifs × positions).
     coef : Float[Tensor, "B M P"]
-        Updated coefficient matrix.
+        Updated coefficient matrix (shape: batch_size × motifs × positions).
     gap : Float[Tensor, " B"]
-        Normalized duality gap.
+        Normalized duality gap (shape: batch_size).
     nll : Float[Tensor, " B"]
-        Normalized negative log likelihood.
+        Normalized negative log likelihood (shape: batch_size).
 
     Notes
     -----
@@ -195,8 +200,8 @@ def optimizer_step(
     coef, gap, nll = prox_grad_step(
         coef_inter, importance_scale, cwms, contribs, sequences, lambdas, step_sizes
     )
-    gap = gap / sequence_length
-    nll = nll / (2 * sequence_length)
+    gap = gap / L
+    nll = nll / (2 * L)
 
     # Compute updated coefficients with Nesterov momentum
     mom_term = i / (i + 3.0)
@@ -250,7 +255,10 @@ class BatchLoaderBase(ABC):
     This class provides common functionality for different input formats
     including batch indexing and padding for consistent batch sizes.
 
-    N = number of sequences, L = sequence length.
+    Dimension notation:
+    - N = number of sequences/regions in dataset
+    - L = sequence length
+    - B = batch size (number of regions processed simultaneously)
 
     Parameters
     ----------
@@ -258,7 +266,7 @@ class BatchLoaderBase(ABC):
         Contribution scores array.
     sequences : Int[Tensor, "N 4 L"]
         One-hot encoded sequences array.
-    sequence_length : int
+    L : int
         Sequence length.
     device : torch.device
         Target device for tensor operations.
@@ -268,12 +276,12 @@ def __init__(
         self,
         contribs: Union[Float[Tensor, "N 4 L"], Float[Tensor, "N L"]],
         sequences: Int[Tensor, "N 4 L"],
-        sequence_length: int,
+        L: int,
         device: torch.device,
     ) -> None:
         self.contribs = contribs
         self.sequences = sequences
-        self.sequence_length = sequence_length
+        self.L = L
         self.device = device
 
     def _get_inds_and_pad_lens(
@@ -291,13 +299,13 @@ def _get_inds_and_pad_lens(
         Returns
         -------
         inds : Int[Tensor, " Z"]
-            Padded indices tensor with -1 for padding positions.
+            Padded indices tensor with -1 for padding positions (shape: padded_batch_size).
         pad_lens : tuple
             Padding specification for F.pad (left, right, top, bottom, front, back).
         """
-        n = end - start
+        N = end - start
         end = min(end, self.contribs.shape[0])
-        overhang = n - (end - start)
+        overhang = N - (end - start)
         pad_lens = (0, 0, 0, 0, 0, overhang)
 
         inds = F.pad(
@@ -314,7 +322,9 @@ def load_batch(
     ]:
         """Load a batch of data.
 
-        B = batch size, L = sequence length.
+        Dimension notation:
+        - B = batch size (number of regions in this batch)
+        - L = sequence length
 
         Parameters
         ----------
@@ -326,11 +336,11 @@ def load_batch(
         Returns
         -------
         contribs_batch : Float[Tensor, "B 4 L"]
-            Batch of contribution scores.
+            Batch of contribution scores (shape: batch_size × 4_bases × L).
         sequences_batch : Union[Int[Tensor, "B 4 L"], int]
-            Batch of sequences or scalar for hypothetical mode.
-        inds_batch : Int[Tensor, "B"]
-            Batch indices.
+            Batch of one-hot encoded sequences (shape: batch_size × 4_bases × L) or scalar 1 for hypothetical mode.
+        inds_batch : Int[Tensor, " B"]
+            Batch indices mapping to original sequence indices (shape: batch_size).
 
         Notes
         -----
@@ -356,12 +366,12 @@ def load_batch(
         contribs_batch = _to_channel_last_layout(
             contribs_compact, device=self.device, dtype=torch.float32
         )
-        sequences_batch = F.pad(self.sequences[start:end, :, :], pad_lens)  # (b, 4, l)
+        sequences_batch = F.pad(self.sequences[start:end, :, :], pad_lens)  # (B, 4, L)
         sequences_batch = _to_channel_last_layout(
             sequences_batch, device=self.device, dtype=torch.int8
         )
 
-        contribs_batch = contribs_batch * sequences_batch  # (b, 4, l)
+        contribs_batch = contribs_batch * sequences_batch  # (B, 4, L)
 
         return contribs_batch, sequences_batch, inds
 
@@ -382,7 +392,7 @@ def load_batch(
         contribs_hyp = _to_channel_last_layout(
             contribs_hyp, device=self.device, dtype=torch.float32
         )
-        sequences_batch = F.pad(self.sequences[start:end, :, :], pad_lens)  # (b, 4, l)
+        sequences_batch = F.pad(self.sequences[start:end, :, :], pad_lens)  # (B, 4, L)
         sequences_batch = _to_channel_last_layout(
             sequences_batch, device=self.device, dtype=torch.int8
         )
@@ -440,20 +450,24 @@ def fit_contribs(
     Parameters
     ----------
     cwms : Float[ndarray, "M 4 W"]
-        Motif contribution weight matrices where M = number of motifs,
-        4 = DNA bases (A, C, G, T), W = motif width.
+        Motif contribution weight matrices where:
+        - M = number of motifs (transcription factor binding patterns)
+        - 4 = DNA bases (A, C, G, T dimensions)
+        - W = motif width (length of each motif pattern)
     contribs : Float[ndarray, "N 4 L"] | Float[ndarray, "N L"]
-        Neural network contribution scores where N = number of regions,
-        L = sequence length. Can be hypothetical (N, 4, L) or projected (N, L).
-    sequences : Float[ndarray, "N 4 L"]
-        One-hot encoded DNA sequences.
+        Neural network contribution scores where:
+        - N = number of regions in dataset
+        - L = sequence length
+        Can be hypothetical (N, 4, L) or projected (N, L) format.
+    sequences : Int[ndarray, "N 4 L"]
+        One-hot encoded DNA sequences (shape: num_regions × 4_bases × L).
     cwm_trim_mask : Float[ndarray, "M W"]
-        Binary mask indicating which positions of each CWM to use.
+        Binary mask indicating which positions of each CWM to use (shape: num_motifs × motif_width).
     use_hypothetical : bool
         Whether to use hypothetical contribution scores (True) or
         projected scores (False).
-    lambdas : Float[ndarray, "M"]
-        L1 regularization weights for each motif.
+    lambdas : Float[ndarray, " M"]
+        L1 regularization weights for each motif (shape: num_motifs).
     step_size_max : float, default 3.0
         Maximum optimization step size.
     step_size_min : float, default 0.08
@@ -531,10 +545,10 @@ def fit_contribs(
     ...     compile_optimizer=False
     ... )
     """
-    m, _, w = cwms.shape
-    n, _, sequence_length = sequences.shape
+    M, _, W = cwms.shape
+    N, _, L = sequences.shape
 
-    b = batch_size
+    B = batch_size  # Using uppercase for consistency with dimension notation
 
     if device is None:
         if torch.cuda.is_available():
@@ -574,13 +588,9 @@ def fit_contribs(
     # Initialize batch loader
     if len(contribs_tensor.shape) == 3:
         if use_hypothetical:
-            batch_loader = BatchLoaderHyp(
-                contribs_tensor, sequences_tensor, sequence_length, device
-            )
+            batch_loader = BatchLoaderHyp(contribs_tensor, sequences_tensor, L, device)
         else:
-            batch_loader = BatchLoaderProj(
-                contribs_tensor, sequences_tensor, sequence_length, device
-            )
+            batch_loader = BatchLoaderProj(contribs_tensor, sequences_tensor, L, device)
     elif len(contribs_tensor.shape) == 2:
         if use_hypothetical:
             raise ValueError(
@@ -588,7 +598,7 @@ def fit_contribs(
             )
         else:
             batch_loader = BatchLoaderCompactFmt(
-                contribs_tensor, sequences_tensor, sequence_length, device
+                contribs_tensor, sequences_tensor, L, device
             )
     else:
         raise ValueError(
@@ -612,21 +622,21 @@ def fit_contribs(
 
     # Initialize buffers for optimizer
     coef_inter: Float[Tensor, "B M P"] = torch.zeros(
-        (b, m, sequence_length - w + 1)
-    )  # (b, m, sequence_length - w + 1)
+        (B, M, L - W + 1)
+    )  # (B, M, P) where P = L - W + 1
     coef_inter = _to_channel_last_layout(coef_inter, device=device, dtype=torch.float32)
     coef: Float[Tensor, "B M P"] = torch.zeros_like(coef_inter)
-    i: Float[Tensor, "B 1 1"] = torch.zeros((b, 1, 1), dtype=torch.int, device=device)
+    i: Float[Tensor, "B 1 1"] = torch.zeros((B, 1, 1), dtype=torch.int, device=device)
     step_sizes: Float[Tensor, "B 1 1"] = torch.full(
-        (b, 1, 1), step_size_max, dtype=torch.float32, device=device
+        (B, 1, 1), step_size_max, dtype=torch.float32, device=device
     )
 
     converged: Bool[Tensor, " B"] = torch.full(
-        (b,), True, dtype=torch.bool, device=device
+        (B,), True, dtype=torch.bool, device=device
     )
-    num_load = b
+    num_load = B
 
-    contribs_buf: Float[Tensor, "B 4 L"] = torch.zeros((b, 4, sequence_length))
+    contribs_buf: Float[Tensor, "B 4 L"] = torch.zeros((B, 4, L))
     contribs_buf = _to_channel_last_layout(
         contribs_buf, device=device, dtype=torch.float32
     )
@@ -635,25 +645,23 @@ def fit_contribs(
     if use_hypothetical:
         seqs_buf = 1
     else:
-        seqs_buf = torch.zeros((b, 4, sequence_length))
+        seqs_buf = torch.zeros((B, 4, L))
         seqs_buf = _to_channel_last_layout(seqs_buf, device=device, dtype=torch.int8)
 
-    importance_scale_buf: Float[Tensor, "B M P"] = torch.zeros(
-        (b, m, sequence_length - w + 1)
-    )
+    importance_scale_buf: Float[Tensor, "B M P"] = torch.zeros((B, M, L - W + 1))
     importance_scale_buf = _to_channel_last_layout(
         importance_scale_buf, device=device, dtype=torch.float32
     )
 
-    inds_buf: Int[Tensor, " B"] = torch.zeros((b,), dtype=torch.int, device=device)
+    inds_buf: Int[Tensor, " B"] = torch.zeros((B,), dtype=torch.int, device=device)
     global_scale_buf: Float[Tensor, " B"] = torch.zeros(
-        (b,), dtype=torch.float, device=device
+        (B,), dtype=torch.float, device=device
     )
 
-    with tqdm(disable=None, unit="regions", total=n, ncols=120) as pbar:
+    with tqdm(disable=None, unit="regions", total=N, ncols=120) as pbar:
         num_complete = 0
         next_ind = 0
-        while num_complete < n:
+        while num_complete < N:
             # Retire converged peaks and fill buffer with new data
             if num_load > 0:
                 load_start = next_ind
@@ -666,9 +674,7 @@ def fit_contribs(
                 if sqrt_transform:
                     contribs_batch = _signed_sqrt(contribs_batch)
 
-                global_scale_batch = (
-                    (contribs_batch**2).sum(dim=(1, 2)) / sequence_length
-                ).sqrt()
+                global_scale_batch = ((contribs_batch**2).sum(dim=(1, 2)) / L).sqrt()
                 contribs_batch = torch.nan_to_num(
                     contribs_batch / global_scale_batch[:, None, None]
                 )
@@ -703,7 +709,7 @@ def fit_contribs(
                 coef,
                 i,
                 step_sizes,
-                sequence_length,
+                L,
                 lambdas_tensor,
             )
             i += 1

From 2c9154fbe4bd3df4f247da4fc1cf5e347a3fe54d Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Tue, 9 Sep 2025 13:51:42 -0700
Subject: [PATCH 33/39] Ensure that hits_unique is well-defined

---
 README.md             | 2 +-
 src/finemo/data_io.py | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 1b1ba64..72e25df 100644
--- a/README.md
+++ b/README.md
@@ -198,7 +198,7 @@ Usage: `finemo call-hits -r <regions> -m <modisco_h5> -o <out_dir> [-p <peaks>]
 - `peak_name`: The name of the peak region containing the hit, taken from the `name` field of the input peak data. `NA` if peak coordinates are not provided.
 - `peak_id`: The numerical index of the peak region containing the hit.
 
-`hits_unique.tsv`: A deduplicated list of hits in the same format as `hits.tsv`. In cases where peak regions overlap, `hits.tsv` may list multiple instances of a hit, each linked to a different peak. `hits_unique.tsv` arbitrarily selects one instance per duplicated hit. This file is generated only if peak coordinates are provided.
+`hits_unique.tsv`: A deduplicated list of hits in the same format as `hits.tsv`. In cases where peak regions overlap, `hits.tsv` may list multiple instances of a hit, each linked to a different peak. `hits_unique.tsv` arbitrarily selects one instance per duplicated hit. **This file is empty if peak coordinates are not provided.**
 
 `hits.bed`: A coordinate-sorted BED file of unique hits. It includes:
 
diff --git a/src/finemo/data_io.py b/src/finemo/data_io.py
index d266178..8369108 100644
--- a/src/finemo/data_io.py
+++ b/src/finemo/data_io.py
@@ -1233,8 +1233,11 @@ def write_hits(
     -----
     Creates three output files:
     - hits.tsv: Complete hit data with all instances
-    - hits_unique.tsv: Deduplicated hits by genomic position and motif
+    - hits_unique.tsv: Deduplicated hits by genomic position and motif (excludes rows with NA chromosome coordinates)
     - hits.bed: BED format file for genome browser visualization
+    
+    Rows where the chromosome field is NA are filtered out during deduplication
+    to ensure that data_unique only contains well-defined genomic coordinates.
     """
     os.makedirs(out_dir, exist_ok=True)
     out_path_tsv = os.path.join(out_dir, "hits.tsv")
@@ -1275,7 +1278,7 @@ def write_hits(
         .select(HITS_DTYPES.keys())
     )
 
-    data_unique = data_all.unique(
+    data_unique = data_all.filter(pl.col("chr").is_not_null()).unique(
         subset=["chr", "start", "motif_name", "strand"], maintain_order=True
     )
 

From 821bcb2756d31eac424934342cf4bd2baee1c485 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Tue, 9 Sep 2025 16:27:36 -0700
Subject: [PATCH 34/39] Remove unneeded guard

---
 src/finemo/main.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/finemo/main.py b/src/finemo/main.py
index 3ffb2af..db78241 100644
--- a/src/finemo/main.py
+++ b/src/finemo/main.py
@@ -1345,11 +1345,6 @@ def cli() -> None:
         )
 
     elif args.cmd == "report":
-        if args.no_recall and not args.no_seqlets:
-            raise ValueError(
-                "The `--no-seqlets` flag must be set in conjunction with `--no-recall`."
-            )
-
         report(
             args.regions,
             args.hits,

From 55cb36a3527a7de5089150ac122c3fcda754e153 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Wed, 10 Sep 2025 18:28:21 -0700
Subject: [PATCH 35/39] Improve motif name sorting logic

---
 src/finemo/data_io.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/finemo/data_io.py b/src/finemo/data_io.py
index 8369108..6dc6074 100644
--- a/src/finemo/data_io.py
+++ b/src/finemo/data_io.py
@@ -707,7 +707,7 @@ def softmax(x: Float[ndarray, "4 W"], temp: float = 100) -> Float[ndarray, "4 W"
     return exp / np.sum(exp, axis=0, keepdims=True)
 
 
-def _motif_name_sort_key(data: Tuple[str, Any]) -> Union[Tuple[int], Tuple[int, str]]:
+def _motif_name_sort_key(data: Tuple[str, Any]) -> Union[Tuple[int, int], Tuple[int, str]]:
     """Generate sort key for TF-MoDISco motif names.
 
     This function creates a sort key that orders motifs by pattern number,
@@ -717,25 +717,27 @@ def _motif_name_sort_key(data: Tuple[str, Any]) -> Union[Tuple[int], Tuple[int,
     ----------
     data : Tuple[str, Any]
         Tuple containing motif name as first element and additional data.
-        The motif name should follow the format 'pattern_N' where N is an integer.
+        The motif name should follow the format 'pattern_N' or 'pattern#N' where N is an integer.
 
     Returns
     -------
-    Union[Tuple[int], Tuple[int, str]]
+    Union[Tuple[int, int], Tuple[int, str]]
         Sort key tuple for ordering motifs. Standard pattern names return
-        (pattern_number,) while non-standard names return (-1, name).
+        (0, pattern_number) while non-standard names return (1, name).
 
     Notes
     -----
     This function is used internally by load_modisco_motifs to ensure
     consistent motif ordering across runs.
     """
-    name = data[0]
-    if name.startswith("pattern_"):
-        pattern_num = int(name.split("_")[-1])
-        return (pattern_num,)
-    else:
-        return (-1, name)  # Mixed tuple types for sorting
+    pattern_name = data[0]
+    try:
+        return (0, int(pattern_name.split("_")[-1]))
+    except (ValueError, IndexError):
+        try:
+            return (0, int(pattern_name.split("#")[-1]))
+        except (ValueError, IndexError):
+            return (1, pattern_name)
 
 
 MODISCO_PATTERN_GROUPS = ["pos_patterns", "neg_patterns"]
@@ -1036,10 +1038,7 @@ def load_modisco_seqlets(
 
             metacluster = modisco_results[name]
 
-            def get_pattern_number(x):
-                return int(x[0].split("_")[-1])
-
-            key = get_pattern_number
+            key = _motif_name_sort_key
             for _, (pattern_name, pattern) in enumerate(
                 sorted(metacluster.items(), key=key)  # type: ignore  # HDF5 access
             ):

From 56f21f4784195975092a55652930f9442e1f1093 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Thu, 11 Sep 2025 04:51:03 -0700
Subject: [PATCH 36/39] Properly encode report URLs

---
 src/finemo/templates/report.html | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/finemo/templates/report.html b/src/finemo/templates/report.html
index a2389d1..c3a6ffc 100644
--- a/src/finemo/templates/report.html
+++ b/src/finemo/templates/report.html
@@ -322,13 +322,13 @@ <h3>Motif-specific hit and seqlet analysis</h3>
             <td class="num_col">{{ item.num_seqlets_only }}</td>
             <td class="num_col">{{ item.num_hits_restricted_only }}</td>
             {% endif %}
-            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/hits_fc.svg" width="360"></td>
-            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/hits_rc.svg" width="360"></td>
-            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/modisco_fc.svg" width="360"></td>
-            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/modisco_rc.svg" width="360"></td>
+            <td class="cwm"><img src="CWMs/{{ item.motif_name|urlencode }}/hits_fc.svg" width="360"></td>
+            <td class="cwm"><img src="CWMs/{{ item.motif_name|urlencode }}/hits_rc.svg" width="360"></td>
+            <td class="cwm"><img src="CWMs/{{ item.motif_name|urlencode }}/modisco_fc.svg" width="360"></td>
+            <td class="cwm"><img src="CWMs/{{ item.motif_name|urlencode }}/modisco_rc.svg" width="360"></td>
             {% if compute_recall %}
-            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/seqlets_only.svg" width="360"></td>
-            <td class="cwm"><img src="CWMs/{{ item.motif_name }}/hits_restricted_only.svg" width="360"></td>
+            <td class="cwm"><img src="CWMs/{{ item.motif_name|urlencode }}/seqlets_only.svg" width="360"></td>
+            <td class="cwm"><img src="CWMs/{{ item.motif_name|urlencode }}/hits_restricted_only.svg" width="360"></td>
             {% endif %}
         </tr>
         {% endfor %}
@@ -389,10 +389,10 @@ <h3>Motif-specific hit quality metrics</h3>
         {% for m in motif_names %}
         <tr>
             <td><code>{{ m }}</code></td>
-            <td class="distplot"><img src="motif_hit_distributions/{{ m }}.svg" width="360"></td>
-            <td class="distplot"><img src="motif_stat_distributions/{{ m }}_coefficients.svg" width="360"></td>
-            <td class="distplot"><img src="motif_stat_distributions/{{ m }}_similarities.svg" width="360"></td>
-            <td class="distplot"><img src="motif_stat_distributions/{{ m }}_importances.svg" width="360"></td>
+            <td class="distplot"><img src="motif_hit_distributions/{{ m|urlencode }}.svg" width="360"></td>
+            <td class="distplot"><img src="motif_stat_distributions/{{ m|urlencode }}_coefficients.svg" width="360"></td>
+            <td class="distplot"><img src="motif_stat_distributions/{{ m|urlencode }}_similarities.svg" width="360"></td>
+            <td class="distplot"><img src="motif_stat_distributions/{{ m|urlencode }}_importances.svg" width="360"></td>
         </tr>
         {% endfor %}
     </tbody>

From ed38c9dad392a765200ba3957635e7b22fea6d97 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 14 Sep 2025 07:41:36 -0700
Subject: [PATCH 37/39] Histogram edge case fallback

---
 src/finemo/visualization.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/finemo/visualization.py b/src/finemo/visualization.py
index 74e4aa2..d331cb9 100644
--- a/src/finemo/visualization.py
+++ b/src/finemo/visualization.py
@@ -112,7 +112,10 @@ def plot_hit_stat_distributions(
         fig, ax = plt.subplots(figsize=(5, 2))
 
         # Plot coefficient distribution
-        ax.hist(coefficients, bins=50, density=True)
+        try:
+            ax.hist(coefficients, bins=50, density=True)
+        except ValueError:
+            ax.hist(coefficients, bins=1, density=True)
 
         output_path_png = os.path.join(motifs_dir, f"{m}_coefficients.png")
         plt.savefig(output_path_png, dpi=300)
@@ -123,7 +126,10 @@ def plot_hit_stat_distributions(
         fig, ax = plt.subplots(figsize=(5, 2))
 
         # Plot similarity distribution
-        ax.hist(similarities, bins=50, density=True)
+        try:
+            ax.hist(similarities, bins=50, density=True)
+        except ValueError:
+            ax.hist(similarities, bins=1, density=True)
 
         output_path_png = os.path.join(motifs_dir, f"{m}_similarities.png")
         plt.savefig(output_path_png, dpi=300)
@@ -134,7 +140,10 @@ def plot_hit_stat_distributions(
         fig, ax = plt.subplots(figsize=(5, 2))
 
         # Plot importance distribution
-        ax.hist(importances, bins=50, density=True)
+        try:
+            ax.hist(importances, bins=50, density=True)
+        except ValueError:
+            ax.hist(importances, bins=1, density=True)
 
         output_path_png = os.path.join(motifs_dir, f"{m}_importances.png")
         plt.savefig(output_path_png, dpi=300)

From 9df34e83255401acc571926cb56e639d5aa1071d Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 14 Sep 2025 16:59:33 -0700
Subject: [PATCH 38/39] README typo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 72e25df..115d397 100644
--- a/README.md
+++ b/README.md
@@ -304,4 +304,4 @@ Usage: `finemo intersect-hits -i <hits> -o <out_path> [-r]`
 
 - `-i/--hits`: The path to one or more input hits file. This should be the `hits.tsv` or `hits_unique.tsv` file generated by the `finemo call-hits` command.
 - `-o/--out-path`: The path to the output file. Reoccuring columns are suffixed with the positional index of the input file (e.g. `hit_importance_1`), with the exception of index 0.
-- `-r/--relaxed`: By default, the intersection assumes consistent input region definitions (name and coordinates) and motif trimming across runs. In contrast, this relaxed intersection criteria uses only motif names and untrimmed hit coordinates. However, this is not suitable when hit genomic coordinates are unknown (e.g. when using `finemo call-hits` with `-p/--peaks`). Default is False.
+- `-r/--relaxed`: By default, the intersection assumes consistent input region definitions (name and coordinates) and motif trimming across runs. In contrast, this relaxed intersection criteria uses only motif names and untrimmed hit coordinates. However, this is not suitable when hit genomic coordinates are unknown. Default is False.

From d5429df9237515f3af743cbce871e354fa308759 Mon Sep 17 00:00:00 2001
From: Austin Wang <austin.wang1357@gmail.com>
Date: Sun, 14 Sep 2025 17:01:37 -0700
Subject: [PATCH 39/39] Install torch from pip

---
 environment.yml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/environment.yml b/environment.yml
index 3720c11..0de69bf 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,12 +1,8 @@
 channels:
-  - pytorch
-  - nvidia
   - conda-forge
   - bioconda
   - nodefaults
 dependencies:
-  - pytorch=2.5.1
-  - pytorch-cuda=12.4
   - python=3.11
   - numba=0.61.2
   - numpy=2.2.0
@@ -19,4 +15,7 @@ dependencies:
   - jinja2=3.1.4
   - pybigwig=0.3.23
   - pyfaidx=0.8.1.3
-  - jaxtyping=0.3.2
\ No newline at end of file
+  - jaxtyping=0.3.2
+  - pip=25.2
+  - pip:
+    - torch==2.5.1
\ No newline at end of file