bxlab · nsoranzo · Jul 29, 2025 · Jul 24, 2025 · Jul 24, 2025 · Jul 24, 2025
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,5 @@
+[flake8]
+ignore = E203,E501,E701,E704,E741,W503
+exclude = .git,.tox,.venv,build,doc/source/conf.py
+import-order-style = smarkets
+application-import-names = bx,bx_extras
diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
@@ -32,11 +32,9 @@ jobs:
         run: python -m cibuildwheel --output-dir dist
         env:
           CIBW_ARCHS: ${{ matrix.archs }}
-          # Skip building musllinux wheels for the CPython versions for which the
-          # numpy version we build against doesn't have musllinux wheels on PyPI.
-          # Skip building for PyPy 3.8, which is deprecated upstream.
-          # Skip building for PyPy on i686 since NumPy 2.0 fails to build on it.
-          CIBW_SKIP: "cp38-musllinux_* pp38-* pp*-manylinux_i686"
+          CIBW_ENABLE: "pypy"
+          # Skip building for PyPy 3.10 https://github.com/pypa/cibuildwheel/issues/2518
+          CIBW_SKIP: "pp310-*"
       - name: Check packages
         run: twine check dist/*
       - uses: actions/upload-artifact@v4

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.8', '3.13']
+        python-version: ['3.9', '3.14']
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
@@ -24,7 +24,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
+        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5

diff --git a/LICENSE b/LICENSE
@@ -1,5 +1,6 @@
 Copyright (c) 2005-2015 The Pennsylvania State University
-Copyright (c) 2013-2020 The Johns Hopkins University
+Copyright (c) 2013-2019 The Johns Hopkins University
+Copyright (c) 2019-2025 Earlham Institute
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -25,7 +25,7 @@
 ]
 
 templates_path = ["_templates"]
-exclude_patterns = []
+exclude_patterns = ()
 
 
 # -- Options for HTML output -------------------------------------------------

diff --git a/lib/bx/align/axt.py b/lib/bx/align/axt.py
@@ -128,7 +128,7 @@ def __init__(self, file, attributes=None):
 
     def write(self, alignment):
         if len(alignment.components) != 2:
-            raise ValueError("%d-component alignment is not compatible with axt" % len(alignment.components))
+            raise ValueError(f"{len(alignment.components)}-component alignment is not compatible with axt")
         c1 = alignment.components[0]
         c2 = alignment.components[1]
 
@@ -143,18 +143,7 @@ def write(self, alignment):
             chr1, chr2 = c1.src, c2.src
 
         self.file.write(
-            "%d %s %d %d %s %d %d %s %s\n"
-            % (
-                self.block,
-                chr1,
-                c1.start + 1,
-                c1.start + c1.size,
-                chr2,
-                c2.start + 1,
-                c2.start + c2.size,
-                c2.strand,
-                alignment.score,
-            )
+            f"{self.block} {chr1} {c1.start + 1} {c1.start + c1.size} {chr2} {c2.start + 1} {c2.start + c2.size} {c2.strand} {alignment.score}\n"
         )
         self.file.write(f"{c1.text}\n")
         self.file.write(f"{c2.text}\n")

diff --git a/lib/bx/align/core.py b/lib/bx/align/core.py
@@ -250,24 +250,11 @@ def __init__(self, src="", start=0, size=0, strand=None, src_size=None, text="")
 
     def __str__(self):
         if self.empty:
-            rval = "e %s %d %d %s %d %s" % (
-                self.src,
-                self.start,
-                self.size,
-                self.strand,
-                self.src_size,
-                self.synteny_empty,
-            )
+            rval = f"e {self.src} {self.start} {self.size} {self.strand} {self.src_size} {self.synteny_empty}"
         else:
-            rval = "s %s %d %d %s %d %s" % (self.src, self.start, self.size, self.strand, self.src_size, self.text)
+            rval = f"s {self.src} {self.start} {self.size} {self.strand} {self.src_size} {self.text}"
             if self.synteny_left and self.synteny_right:
-                rval += "\ni %s %s %d %s %d" % (
-                    self.src,
-                    self.synteny_left[0],
-                    self.synteny_left[1],
-                    self.synteny_right[0],
-                    self.synteny_right[1],
-                )
+                rval += f"\ni {self.src} {self.synteny_left[0]} {self.synteny_left[1]} {self.synteny_right[0]} {self.synteny_right[1]}"
         return rval
 
     def get_end(self):
@@ -382,7 +369,7 @@ def coord_to_col(self, pos):
             raise ValueError("There is no column index. It is empty.")
         start, end = self.get_forward_strand_start(), self.get_forward_strand_end()
         if pos < start or pos > end:
-            raise ValueError("Range error: %d not in %d-%d" % (pos, start, end))
+            raise ValueError(f"Range error: {pos} not in {start}-{end}")
         if not self.index:
             self.index = []
             if self.strand == "-":

diff --git a/lib/bx/align/epo.py b/lib/bx/align/epo.py
@@ -97,7 +97,7 @@ def _make_from_epo(cls, trg_comp, qr_comp, trg_chrom_sizes, qr_chrom_sizes):
             else:
                 break
         S.append(min(a[1], b[1]) - max(a[0], b[0]))
-        assert len(T) == len(Q) == len(S) - 1, "(S, T, Q) = (%d, %d, %d)" % tuple(map(len, (S, T, Q)))
+        assert len(T) == len(Q) == len(S) - 1, f"(S, T, Q) = ({len(S)}, {len(T)}, {len(Q)})"
 
         tSize = trg_chrom_sizes[trg_comp.chrom]
         qSize = qr_chrom_sizes[qr_comp.chrom]
@@ -138,16 +138,12 @@ def _make_from_epo(cls, trg_comp, qr_comp, trg_chrom_sizes, qr_chrom_sizes):
         if chain.qStrand == "-":
             chain = chain._replace(qEnd=chain.qSize - chain.qStart, qStart=chain.qSize - chain.qEnd)
 
-        assert chain.tEnd - chain.tStart == sum(S) + sum(T), "[%s] %d != %d" % (
-            str(chain),
-            chain.tEnd - chain.tStart,
-            sum(S) + sum(T),
-        )
-        assert chain.qEnd - chain.qStart == sum(S) + sum(Q), "[%s] %d != %d" % (
-            str(chain),
-            chain.qEnd - chain.qStart,
-            sum(S) + sum(Q),
-        )
+        assert chain.tEnd - chain.tStart == sum(S) + sum(
+            T
+        ), f"[{str(chain)}] {chain.tEnd - chain.tStart} != {sum(S) + sum(T)}"
+        assert chain.qEnd - chain.qStart == sum(S) + sum(
+            Q
+        ), f"[{str(chain)}] {chain.qEnd - chain.qStart} != {sum(S) + sum(Q)}"
         return chain, S, T, Q
 
     def slice(self, who):
@@ -223,7 +219,7 @@ def __repr__(self):
 
     def __str__(self):
         c = self.cigar[:5] + "..." + self.cigar[-5:]
-        return "(%s %s %s %d %d %s %s)" % tuple(self[:6] + (c,))
+        return "({} {} {} {} {} {} {})".format(*tuple(self[:6] + (c,)))
 
     @classmethod
     def _strfactory(cls, line):
@@ -318,10 +314,7 @@ def intervals(self, reverse, thr=0):
         assert sum(t[0] for t in self.cigar_iter(False) if t[1] == "M") == sum(t[1] - t[0] for t in d)
 
         d_sum = sum(t[1] - t[0] for t in d)
-        assert self.end - self.start + 1 == d_sum, "[ (%d, %d) = %d ] != %d" % (
-            self.start,
-            self.end,
-            self.end - self.start + 1,
-            d_sum,
-        )
+        assert (
+            self.end - self.start + 1 == d_sum
+        ), f"[ ({self.start}, {self.end}) = {self.end - self.start + 1} ] != {d_sum}"
         return d[1:]  # clip the (thr, thr) entry
diff --git a/lib/bx/align/epo_tests.py b/lib/bx/align/epo_tests.py
@@ -30,7 +30,7 @@ def test_ci(self):
         for i in range(self.N):
             assert C[i, 1] - C[i, 0] == S[i]
         for i in range(1, self.N):
-            assert C[i, 0] - C[i - 1, 1] == D[i - 1], "[%d] %d != %d" % (i, C[i, 0] - C[i - 1, 1], D[i - 1])
+            assert C[i, 0] - C[i - 1, 1] == D[i - 1], f"[{i}] {C[i, 0] - C[i - 1, 1]} != {D[i - 1]}"
 
     def test_elem_u(self):
         # back to back, so should return a single interval
@@ -141,7 +141,7 @@ def toCigar(species, id, s):
         C.append(dc + mc)
     MSUM = sum(i[1] - i[0] for i in I)
     start = random.randint(50, 10000)
-    return "%s\t%d\t1\t%d\t%d\t%d\t%s" % (species, id, start, start + MSUM - 1, random.choice((-1, 1)), "".join(C))
+    return "{}\t{}\t1\t{}\t{}\t{}\t{}".format(species, id, start, start + MSUM - 1, random.choice((-1, 1)), "".join(C))
 
 
 class TestEpo(unittest.TestCase):
@@ -207,10 +207,10 @@ def test_rem_dash(self):
             qStart = random.randint(0, 1000)
             epo_pair = (
                 EPOitem._strfactory(
-                    "homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart + 12 - 1, "4M2D4M%dD4M" % (dash_cols + 3))
+                    "homo_sapiens\t0\t1\t{}\t{}\t1\t{}".format(tStart, tStart + 12 - 1, f"4M2D4M{dash_cols + 3}D4M")
                 ),
                 EPOitem._strfactory(
-                    "mus_musculus\t0\t1\t%d\t%d\t1\t%s" % (qStart, qStart + 14 - 1, "7M%dD7M" % (dash_cols + 3))
+                    "mus_musculus\t0\t1\t{}\t{}\t1\t{}".format(qStart, qStart + 14 - 1, f"7M{dash_cols + 3}D7M")
                 ),
             )
             chain = Chain._make_from_epo(epo_pair[0], epo_pair[1], {"chr1": 500}, {"chr1": 800})
@@ -236,19 +236,20 @@ def test_rem_dash(self):
 
             epo_pair = (
                 EPOitem._strfactory(
-                    "homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart + tm - 1, "%dD%dM" % (dash_cols + 1, tm))
+                    "homo_sapiens\t0\t1\t{}\t{}\t1\t{}".format(tStart, tStart + tm - 1, f"{dash_cols + 1}D{tm}M")
                 ),
                 EPOitem._strfactory(
-                    "mus_musculus\t0\t1\t%d\t%d\t1\t%s"
-                    % (qStart, qStart + qm + 1 - 1, "M%dD%dM" % (dash_cols + tm - qm, qm))
+                    "mus_musculus\t0\t1\t{}\t{}\t1\t{}".format(
+                        qStart, qStart + qm + 1 - 1, f"M{dash_cols + tm - qm}D{qm}M"
+                    )
                 ),
             )
             chain = Chain._make_from_epo(epo_pair[0], epo_pair[1], {"chr1": 500}, {"chr1": 800})
             if chain[1][-1] != qm:
                 pdb.set_trace()
             assert chain[1][-1] == qm
             # correct also for coordinate interpretation differences between UCSC and EPO
-            assert (qStart + 1) - 1 == chain[0].qStart, "%d != %d" % (qStart + 1, chain[0].qStart)
+            assert (qStart + 1) - 1 == chain[0].qStart, f"{qStart + 1} != {chain[0].qStart}"
 
 
 if __name__ == "__main__":

diff --git a/lib/bx/align/lav.py b/lib/bx/align/lav.py
@@ -61,7 +61,7 @@ def __next__(self):
                 continue
             if line == "#:eof":
                 line = self.file.readline().rstrip()
-                assert not line, 'extra line after #:eof (line %d, "%s")' % (self.lineNumber, line)
+                assert not line, f'extra line after #:eof (line {self.lineNumber}, "{line}")'
                 return None
             if line == "#:lav":
                 continue
@@ -80,7 +80,7 @@ def __next__(self):
             if line.endswith("{"):
                 self.parse_unknown_stanza()
                 continue
-            raise ValueError('incomprehensible line (line %d, "%s")' % (self.lineNumber, line))
+            raise ValueError(f'incomprehensible line (line {self.lineNumber}, "{line}")')
         return self.build_alignment(score, pieces)
 
     def __iter__(self):
@@ -161,7 +161,7 @@ def open_seqs(self):
         length2 = self.seq2_file.length
         assert (
             (species1 != species2) or (chrom1 != chrom2) or (length1 == length2)
-        ), "conflicting lengths for %s (%d and %d)" % (self.seq1_src, length1, length2)
+        ), f"conflicting lengths for {self.seq1_src} ({length1} and {length2})"
 
         self.species_to_lengths = {}
         self.species_to_lengths[species1] = {}
@@ -190,7 +190,7 @@ def parse_s_stanza(self):
         )
 
         line = self.fetch_line(report=" in s-stanza")
-        assert line == "}", 'improper s-stanza terminator (line %d, "%s")' % (self.lineNumber, line)
+        assert line == "}", f'improper s-stanza terminator (line {self.lineNumber}, "{line}")'
 
     def parse_s_seq(self, line):
         fields = line.split()
@@ -234,7 +234,7 @@ def parse_h_stanza(self):
             self.seq2_header = "seq2"
 
         line = self.fetch_line(report=" in h-stanza")
-        assert line == "}", 'improper h-stanza terminator (line %d, "%s")' % (self.lineNumber, line)
+        assert line == "}", f'improper h-stanza terminator (line {self.lineNumber}, "{line}")'
 
     def parse_a_stanza(self):
         """returns the pair (score,pieces)
@@ -243,7 +243,7 @@ def parse_a_stanza(self):
         # 's' line -- score, 1 field
         line = self.fetch_line(report=" in a-stanza")
         fields = line.split()
-        assert fields[0] == "s", 's line expected in a-stanza (line %d, "%s")' % (self.lineNumber, line)
+        assert fields[0] == "s", f's line expected in a-stanza (line {self.lineNumber}, "{line}")'
         try:
             score = int(fields[1])
         except ValueError:
@@ -252,12 +252,12 @@ def parse_a_stanza(self):
         # 'b' line -- begin positions in seqs, 2 fields
         line = self.fetch_line(report=" in a-stanza")
         fields = line.split()
-        assert fields[0] == "b", 'b line expected in a-stanza (line %d, "%s")' % (self.lineNumber, line)
+        assert fields[0] == "b", f'b line expected in a-stanza (line {self.lineNumber}, "{line}")'
 
         # 'e' line -- end positions in seqs, 2 fields
         line = self.fetch_line(report=" in a-stanza")
         fields = line.split()
-        assert fields[0] == "e", 'e line expected in a-stanza (line %d, "%s")' % (self.lineNumber, line)
+        assert fields[0] == "e", f'e line expected in a-stanza (line {self.lineNumber}, "{line}")'
 
         # 'l' lines
         pieces = []
@@ -276,7 +276,7 @@ def parse_a_stanza(self):
                 pctId = float(fields[5])
             assert length2 == length, "length mismatch in a-stanza"
             pieces.append((start1 + self.seq1_start, start2 + self.seq2_start, length, pctId))
-        assert line == "}", 'improper a-stanza terminator (line %d, "%s")' % (self.lineNumber, line)
+        assert line == "}", f'improper a-stanza terminator (line {self.lineNumber}, "{line}")'
         return (score, pieces)
 
     def parse_unknown_stanza(self):
@@ -298,7 +298,7 @@ def fetch_line(self, strip=True, requireLine=True, report=""):
             line = self.file.readline().strip().strip(strip)
         self.lineNumber += 1
         if requireLine:
-            assert line, "unexpected blank line or end of file%s (line %d)" % (report, self.lineNumber)
+            assert line, f"unexpected blank line or end of file{report} (line {self.lineNumber})"
         return line
 
     def d_stanza(self):
@@ -319,20 +319,8 @@ def s_stanza(self):
         else:
             seq2_strand = "0"
 
-        s = '  "%s" %d %d %s %d\n' % (
-            self.seq1_filename,
-            self.seq2_start + 1,
-            self.seq1_end,
-            seq1_strand,
-            self.seq1_contig,
-        )
-        s += '  "%s" %d %d %s %d\n' % (
-            self.seq2_filename,
-            self.seq2_start + 1,
-            self.seq2_end,
-            seq2_strand,
-            self.seq2_contig,
-        )
+        s = f'  "{self.seq1_filename}" {self.seq2_start + 1} {self.seq1_end} {seq1_strand} {self.seq1_contig}\n'
+        s += f'  "{self.seq2_filename}" {self.seq2_start + 1} {self.seq2_end} {seq2_strand} {self.seq2_contig}\n'
 
         return f"s {{\n{s}}}"
 
@@ -464,7 +452,7 @@ def __init__(self, file, attributes=None):
 
     def write(self, alignment):
         if len(alignment.components) != 2:
-            raise ValueError("%d-component alignment is not compatible with lav" % len(alignment.components))
+            raise ValueError(f"{len(alignment.components)}-component alignment is not compatible with lav")
 
         c1 = alignment.components[0]
         c2 = alignment.components[1]
@@ -502,8 +490,8 @@ def write_s_stanza(self):
         fname1 = build_filename(self.fname1, self.src1)
         fname2 = build_filename(self.fname2, self.src2)
         print("s {", file=self.file)
-        print('  "%s%s" 1 %d %d 1' % (fname1, strand1, self.length1, flag1), file=self.file)
-        print('  "%s%s" 1 %d %d 1' % (fname2, strand2, self.length2, flag2), file=self.file)
+        print(f'  "{fname1}{strand1}" 1 {self.length1} {flag1} 1', file=self.file)
+        print(f'  "{fname2}{strand2}" 1 {self.length2} {flag2} 1', file=self.file)
         print("}", file=self.file)
 
     def write_h_stanza(self):
@@ -565,10 +553,10 @@ def write_a_stanza(self, alignment):
 
         print("a {", file=self.file)
         print(f"  s {score}", file=self.file)
-        print("  b %d %d" % (start1 + 1, start2 + 1), file=self.file)
-        print("  e %d %d" % (end1, end2), file=self.file)
+        print(f"  b {start1 + 1} {start2 + 1}", file=self.file)
+        print(f"  e {end1} {end2}", file=self.file)
         for start1, start2, size, pctId in pieces:
-            print("  l %d %d %d %d %d" % (start1 + 1, start2 + 1, start1 + size, start2 + size, pctId), file=self.file)
+            print(f"  l {start1 + 1} {start2 + 1} {start1 + size} {start2 + size} {pctId}", file=self.file)
         print("}", file=self.file)
 
     def write_lav_marker(self):

diff --git a/lib/bx/align/score.py b/lib/bx/align/score.py
@@ -114,7 +114,7 @@ def __str__(self):
                 s = b
             else:
                 s = f"{ord(b):02X}"
-            line.append("%*s" % (width, s))
+            line.append(f"{s:>{width}}")
         lines.append(("".join(line)) + "\n")
         for a in self.alphabet1:
             line = []
@@ -129,7 +129,7 @@ def __str__(self):
                     s = f"{score:8.6f}"
                 else:
                     s = f"{score}"
-                line.append("%*s" % (width, s))
+                line.append(f"{s:>{width}}")
             lines.append(("".join(line)) + "\n")
         return "".join(lines)