diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..88e8c28 --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +ignore = E203,E501,E701,E704,E741,W503 +exclude = .git,.tox,.venv,build,doc/source/conf.py +import-order-style = smarkets +application-import-names = bx,bx_extras diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index a1f5f23..b5bef86 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -32,11 +32,9 @@ jobs: run: python -m cibuildwheel --output-dir dist env: CIBW_ARCHS: ${{ matrix.archs }} - # Skip building musllinux wheels for the CPython versions for which the - # numpy version we build against doesn't have musllinux wheels on PyPI. - # Skip building for PyPy 3.8, which is deprecated upstream. - # Skip building for PyPy on i686 since NumPy 2.0 fails to build on it. - CIBW_SKIP: "cp38-musllinux_* pp38-* pp*-manylinux_i686" + CIBW_ENABLE: "pypy" + # Skip building for PyPy 3.10 https://github.com/pypa/cibuildwheel/issues/2518 + CIBW_SKIP: "pp310-*" - name: Check packages run: twine check dist/* - uses: actions/upload-artifact@v4 diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f1f6981..dee179d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.13'] + python-version: ['3.9', '3.14'] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -24,7 +24,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/LICENSE b/LICENSE index 66baee1..b018c54 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,6 @@ Copyright (c) 2005-2015 The Pennsylvania State University -Copyright (c) 2013-2020 The Johns Hopkins University +Copyright (c) 2013-2019 The Johns Hopkins University +Copyright (c) 2019-2025 Earlham Institute Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/doc/source/conf.py b/doc/source/conf.py index b16bb3e..3d17469 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -25,7 +25,7 @@ ] templates_path = ["_templates"] -exclude_patterns = [] +exclude_patterns = () # -- Options for HTML output ------------------------------------------------- diff --git a/lib/bx/align/axt.py b/lib/bx/align/axt.py index 6526c2d..c0e180d 100644 --- a/lib/bx/align/axt.py +++ b/lib/bx/align/axt.py @@ -128,7 +128,7 @@ def __init__(self, file, attributes=None): def write(self, alignment): if len(alignment.components) != 2: - raise ValueError("%d-component alignment is not compatible with axt" % len(alignment.components)) + raise ValueError(f"{len(alignment.components)}-component alignment is not compatible with axt") c1 = alignment.components[0] c2 = alignment.components[1] @@ -143,18 +143,7 @@ def write(self, alignment): chr1, chr2 = c1.src, c2.src self.file.write( - "%d %s %d %d %s %d %d %s %s\n" - % ( - self.block, - chr1, - c1.start + 1, - c1.start + c1.size, - chr2, - c2.start + 1, - c2.start + c2.size, - c2.strand, - alignment.score, - ) + f"{self.block} {chr1} {c1.start + 1} {c1.start + c1.size} {chr2} {c2.start + 1} {c2.start + c2.size} {c2.strand} {alignment.score}\n" ) self.file.write(f"{c1.text}\n") self.file.write(f"{c2.text}\n") diff --git a/lib/bx/align/core.py b/lib/bx/align/core.py index 0ba1f3f..5637af7 100644 --- a/lib/bx/align/core.py +++ b/lib/bx/align/core.py @@ -250,24 +250,11 @@ def __init__(self, src="", start=0, size=0, strand=None, src_size=None, text="") def __str__(self): if self.empty: - rval = "e %s %d %d %s %d %s" % ( - self.src, - self.start, - self.size, - self.strand, - self.src_size, - self.synteny_empty, - ) + rval = f"e {self.src} {self.start} {self.size} {self.strand} {self.src_size} {self.synteny_empty}" else: - rval = "s %s %d %d %s %d %s" % (self.src, self.start, self.size, self.strand, self.src_size, self.text) + rval = f"s {self.src} {self.start} {self.size} {self.strand} {self.src_size} {self.text}" if self.synteny_left and self.synteny_right: - rval += "\ni %s %s %d %s %d" % ( - self.src, - self.synteny_left[0], - self.synteny_left[1], - self.synteny_right[0], - self.synteny_right[1], - ) + rval += f"\ni {self.src} {self.synteny_left[0]} {self.synteny_left[1]} {self.synteny_right[0]} {self.synteny_right[1]}" return rval def get_end(self): @@ -382,7 +369,7 @@ def coord_to_col(self, pos): raise ValueError("There is no column index. It is empty.") start, end = self.get_forward_strand_start(), self.get_forward_strand_end() if pos < start or pos > end: - raise ValueError("Range error: %d not in %d-%d" % (pos, start, end)) + raise ValueError(f"Range error: {pos} not in {start}-{end}") if not self.index: self.index = [] if self.strand == "-": diff --git a/lib/bx/align/epo.py b/lib/bx/align/epo.py index be1992d..e080802 100644 --- a/lib/bx/align/epo.py +++ b/lib/bx/align/epo.py @@ -97,7 +97,7 @@ def _make_from_epo(cls, trg_comp, qr_comp, trg_chrom_sizes, qr_chrom_sizes): else: break S.append(min(a[1], b[1]) - max(a[0], b[0])) - assert len(T) == len(Q) == len(S) - 1, "(S, T, Q) = (%d, %d, %d)" % tuple(map(len, (S, T, Q))) + assert len(T) == len(Q) == len(S) - 1, f"(S, T, Q) = ({len(S)}, {len(T)}, {len(Q)})" tSize = trg_chrom_sizes[trg_comp.chrom] qSize = qr_chrom_sizes[qr_comp.chrom] @@ -138,16 +138,12 @@ def _make_from_epo(cls, trg_comp, qr_comp, trg_chrom_sizes, qr_chrom_sizes): if chain.qStrand == "-": chain = chain._replace(qEnd=chain.qSize - chain.qStart, qStart=chain.qSize - chain.qEnd) - assert chain.tEnd - chain.tStart == sum(S) + sum(T), "[%s] %d != %d" % ( - str(chain), - chain.tEnd - chain.tStart, - sum(S) + sum(T), - ) - assert chain.qEnd - chain.qStart == sum(S) + sum(Q), "[%s] %d != %d" % ( - str(chain), - chain.qEnd - chain.qStart, - sum(S) + sum(Q), - ) + assert chain.tEnd - chain.tStart == sum(S) + sum( + T + ), f"[{str(chain)}] {chain.tEnd - chain.tStart} != {sum(S) + sum(T)}" + assert chain.qEnd - chain.qStart == sum(S) + sum( + Q + ), f"[{str(chain)}] {chain.qEnd - chain.qStart} != {sum(S) + sum(Q)}" return chain, S, T, Q def slice(self, who): @@ -223,7 +219,7 @@ def __repr__(self): def __str__(self): c = self.cigar[:5] + "..." + self.cigar[-5:] - return "(%s %s %s %d %d %s %s)" % tuple(self[:6] + (c,)) + return "({} {} {} {} {} {} {})".format(*tuple(self[:6] + (c,))) @classmethod def _strfactory(cls, line): @@ -318,10 +314,7 @@ def intervals(self, reverse, thr=0): assert sum(t[0] for t in self.cigar_iter(False) if t[1] == "M") == sum(t[1] - t[0] for t in d) d_sum = sum(t[1] - t[0] for t in d) - assert self.end - self.start + 1 == d_sum, "[ (%d, %d) = %d ] != %d" % ( - self.start, - self.end, - self.end - self.start + 1, - d_sum, - ) + assert ( + self.end - self.start + 1 == d_sum + ), f"[ ({self.start}, {self.end}) = {self.end - self.start + 1} ] != {d_sum}" return d[1:] # clip the (thr, thr) entry diff --git a/lib/bx/align/epo_tests.py b/lib/bx/align/epo_tests.py index 186cf9d..9c23a30 100644 --- a/lib/bx/align/epo_tests.py +++ b/lib/bx/align/epo_tests.py @@ -30,7 +30,7 @@ def test_ci(self): for i in range(self.N): assert C[i, 1] - C[i, 0] == S[i] for i in range(1, self.N): - assert C[i, 0] - C[i - 1, 1] == D[i - 1], "[%d] %d != %d" % (i, C[i, 0] - C[i - 1, 1], D[i - 1]) + assert C[i, 0] - C[i - 1, 1] == D[i - 1], f"[{i}] {C[i, 0] - C[i - 1, 1]} != {D[i - 1]}" def test_elem_u(self): # back to back, so should return a single interval @@ -141,7 +141,7 @@ def toCigar(species, id, s): C.append(dc + mc) MSUM = sum(i[1] - i[0] for i in I) start = random.randint(50, 10000) - return "%s\t%d\t1\t%d\t%d\t%d\t%s" % (species, id, start, start + MSUM - 1, random.choice((-1, 1)), "".join(C)) + return "{}\t{}\t1\t{}\t{}\t{}\t{}".format(species, id, start, start + MSUM - 1, random.choice((-1, 1)), "".join(C)) class TestEpo(unittest.TestCase): @@ -207,10 +207,10 @@ def test_rem_dash(self): qStart = random.randint(0, 1000) epo_pair = ( EPOitem._strfactory( - "homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart + 12 - 1, "4M2D4M%dD4M" % (dash_cols + 3)) + "homo_sapiens\t0\t1\t{}\t{}\t1\t{}".format(tStart, tStart + 12 - 1, f"4M2D4M{dash_cols + 3}D4M") ), EPOitem._strfactory( - "mus_musculus\t0\t1\t%d\t%d\t1\t%s" % (qStart, qStart + 14 - 1, "7M%dD7M" % (dash_cols + 3)) + "mus_musculus\t0\t1\t{}\t{}\t1\t{}".format(qStart, qStart + 14 - 1, f"7M{dash_cols + 3}D7M") ), ) chain = Chain._make_from_epo(epo_pair[0], epo_pair[1], {"chr1": 500}, {"chr1": 800}) @@ -236,11 +236,12 @@ def test_rem_dash(self): epo_pair = ( EPOitem._strfactory( - "homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart + tm - 1, "%dD%dM" % (dash_cols + 1, tm)) + "homo_sapiens\t0\t1\t{}\t{}\t1\t{}".format(tStart, tStart + tm - 1, f"{dash_cols + 1}D{tm}M") ), EPOitem._strfactory( - "mus_musculus\t0\t1\t%d\t%d\t1\t%s" - % (qStart, qStart + qm + 1 - 1, "M%dD%dM" % (dash_cols + tm - qm, qm)) + "mus_musculus\t0\t1\t{}\t{}\t1\t{}".format( + qStart, qStart + qm + 1 - 1, f"M{dash_cols + tm - qm}D{qm}M" + ) ), ) chain = Chain._make_from_epo(epo_pair[0], epo_pair[1], {"chr1": 500}, {"chr1": 800}) @@ -248,7 +249,7 @@ def test_rem_dash(self): pdb.set_trace() assert chain[1][-1] == qm # correct also for coordinate interpretation differences between UCSC and EPO - assert (qStart + 1) - 1 == chain[0].qStart, "%d != %d" % (qStart + 1, chain[0].qStart) + assert (qStart + 1) - 1 == chain[0].qStart, f"{qStart + 1} != {chain[0].qStart}" if __name__ == "__main__": diff --git a/lib/bx/align/lav.py b/lib/bx/align/lav.py index 234033f..f44c0f0 100644 --- a/lib/bx/align/lav.py +++ b/lib/bx/align/lav.py @@ -61,7 +61,7 @@ def __next__(self): continue if line == "#:eof": line = self.file.readline().rstrip() - assert not line, 'extra line after #:eof (line %d, "%s")' % (self.lineNumber, line) + assert not line, f'extra line after #:eof (line {self.lineNumber}, "{line}")' return None if line == "#:lav": continue @@ -80,7 +80,7 @@ def __next__(self): if line.endswith("{"): self.parse_unknown_stanza() continue - raise ValueError('incomprehensible line (line %d, "%s")' % (self.lineNumber, line)) + raise ValueError(f'incomprehensible line (line {self.lineNumber}, "{line}")') return self.build_alignment(score, pieces) def __iter__(self): @@ -161,7 +161,7 @@ def open_seqs(self): length2 = self.seq2_file.length assert ( (species1 != species2) or (chrom1 != chrom2) or (length1 == length2) - ), "conflicting lengths for %s (%d and %d)" % (self.seq1_src, length1, length2) + ), f"conflicting lengths for {self.seq1_src} ({length1} and {length2})" self.species_to_lengths = {} self.species_to_lengths[species1] = {} @@ -190,7 +190,7 @@ def parse_s_stanza(self): ) line = self.fetch_line(report=" in s-stanza") - assert line == "}", 'improper s-stanza terminator (line %d, "%s")' % (self.lineNumber, line) + assert line == "}", f'improper s-stanza terminator (line {self.lineNumber}, "{line}")' def parse_s_seq(self, line): fields = line.split() @@ -234,7 +234,7 @@ def parse_h_stanza(self): self.seq2_header = "seq2" line = self.fetch_line(report=" in h-stanza") - assert line == "}", 'improper h-stanza terminator (line %d, "%s")' % (self.lineNumber, line) + assert line == "}", f'improper h-stanza terminator (line {self.lineNumber}, "{line}")' def parse_a_stanza(self): """returns the pair (score,pieces) @@ -243,7 +243,7 @@ def parse_a_stanza(self): # 's' line -- score, 1 field line = self.fetch_line(report=" in a-stanza") fields = line.split() - assert fields[0] == "s", 's line expected in a-stanza (line %d, "%s")' % (self.lineNumber, line) + assert fields[0] == "s", f's line expected in a-stanza (line {self.lineNumber}, "{line}")' try: score = int(fields[1]) except ValueError: @@ -252,12 +252,12 @@ def parse_a_stanza(self): # 'b' line -- begin positions in seqs, 2 fields line = self.fetch_line(report=" in a-stanza") fields = line.split() - assert fields[0] == "b", 'b line expected in a-stanza (line %d, "%s")' % (self.lineNumber, line) + assert fields[0] == "b", f'b line expected in a-stanza (line {self.lineNumber}, "{line}")' # 'e' line -- end positions in seqs, 2 fields line = self.fetch_line(report=" in a-stanza") fields = line.split() - assert fields[0] == "e", 'e line expected in a-stanza (line %d, "%s")' % (self.lineNumber, line) + assert fields[0] == "e", f'e line expected in a-stanza (line {self.lineNumber}, "{line}")' # 'l' lines pieces = [] @@ -276,7 +276,7 @@ def parse_a_stanza(self): pctId = float(fields[5]) assert length2 == length, "length mismatch in a-stanza" pieces.append((start1 + self.seq1_start, start2 + self.seq2_start, length, pctId)) - assert line == "}", 'improper a-stanza terminator (line %d, "%s")' % (self.lineNumber, line) + assert line == "}", f'improper a-stanza terminator (line {self.lineNumber}, "{line}")' return (score, pieces) def parse_unknown_stanza(self): @@ -298,7 +298,7 @@ def fetch_line(self, strip=True, requireLine=True, report=""): line = self.file.readline().strip().strip(strip) self.lineNumber += 1 if requireLine: - assert line, "unexpected blank line or end of file%s (line %d)" % (report, self.lineNumber) + assert line, f"unexpected blank line or end of file{report} (line {self.lineNumber})" return line def d_stanza(self): @@ -319,20 +319,8 @@ def s_stanza(self): else: seq2_strand = "0" - s = ' "%s" %d %d %s %d\n' % ( - self.seq1_filename, - self.seq2_start + 1, - self.seq1_end, - seq1_strand, - self.seq1_contig, - ) - s += ' "%s" %d %d %s %d\n' % ( - self.seq2_filename, - self.seq2_start + 1, - self.seq2_end, - seq2_strand, - self.seq2_contig, - ) + s = f' "{self.seq1_filename}" {self.seq2_start + 1} {self.seq1_end} {seq1_strand} {self.seq1_contig}\n' + s += f' "{self.seq2_filename}" {self.seq2_start + 1} {self.seq2_end} {seq2_strand} {self.seq2_contig}\n' return f"s {{\n{s}}}" @@ -464,7 +452,7 @@ def __init__(self, file, attributes=None): def write(self, alignment): if len(alignment.components) != 2: - raise ValueError("%d-component alignment is not compatible with lav" % len(alignment.components)) + raise ValueError(f"{len(alignment.components)}-component alignment is not compatible with lav") c1 = alignment.components[0] c2 = alignment.components[1] @@ -502,8 +490,8 @@ def write_s_stanza(self): fname1 = build_filename(self.fname1, self.src1) fname2 = build_filename(self.fname2, self.src2) print("s {", file=self.file) - print(' "%s%s" 1 %d %d 1' % (fname1, strand1, self.length1, flag1), file=self.file) - print(' "%s%s" 1 %d %d 1' % (fname2, strand2, self.length2, flag2), file=self.file) + print(f' "{fname1}{strand1}" 1 {self.length1} {flag1} 1', file=self.file) + print(f' "{fname2}{strand2}" 1 {self.length2} {flag2} 1', file=self.file) print("}", file=self.file) def write_h_stanza(self): @@ -565,10 +553,10 @@ def write_a_stanza(self, alignment): print("a {", file=self.file) print(f" s {score}", file=self.file) - print(" b %d %d" % (start1 + 1, start2 + 1), file=self.file) - print(" e %d %d" % (end1, end2), file=self.file) + print(f" b {start1 + 1} {start2 + 1}", file=self.file) + print(f" e {end1} {end2}", file=self.file) for start1, start2, size, pctId in pieces: - print(" l %d %d %d %d %d" % (start1 + 1, start2 + 1, start1 + size, start2 + size, pctId), file=self.file) + print(f" l {start1 + 1} {start2 + 1} {start1 + size} {start2 + size} {pctId}", file=self.file) print("}", file=self.file) def write_lav_marker(self): diff --git a/lib/bx/align/score.py b/lib/bx/align/score.py index 2b62ca7..a7affea 100644 --- a/lib/bx/align/score.py +++ b/lib/bx/align/score.py @@ -114,7 +114,7 @@ def __str__(self): s = b else: s = f"{ord(b):02X}" - line.append("%*s" % (width, s)) + line.append(f"{s:>{width}}") lines.append(("".join(line)) + "\n") for a in self.alphabet1: line = [] @@ -129,7 +129,7 @@ def __str__(self): s = f"{score:8.6f}" else: s = f"{score}" - line.append("%*s" % (width, s)) + line.append(f"{s:>{width}}") lines.append(("".join(line)) + "\n") return "".join(lines) diff --git a/lib/bx/binned_array.py b/lib/bx/binned_array.py index 66d0f13..1aa4c80 100644 --- a/lib/bx/binned_array.py +++ b/lib/bx/binned_array.py @@ -183,7 +183,7 @@ def __init__(self, f, cache=32): M, V, max_size, bin_size, nbins = read_packed(f, ">5I") assert M == MAGIC # assert version less than max supported - assert V <= VERSION, "File is version %d but I don't know about anything beyond %d" % (V, VERSION) + assert V <= VERSION, f"File is version {V} but I don't know about anything beyond {VERSION}" self.max_size = max_size self.bin_size = bin_size self.nbins = nbins diff --git a/lib/bx/binned_array_tests.py b/lib/bx/binned_array_tests.py index 293196b..4b8ba76 100644 --- a/lib/bx/binned_array_tests.py +++ b/lib/bx/binned_array_tests.py @@ -47,19 +47,16 @@ def test_simple(source_target): source, target = source_target # Verify for i in range(len(source)): - assert source[i] == target[i], "No match, index: %d, source: %f, target: %f, len( source ): %d" % ( - i, - source[i], - target[i], - len(source), - ) + assert ( + source[i] == target[i] + ), f"No match, index: {i}, source: {source[i]:f}, target: {target[i]:f}, len( source ): {len(source)}" # Verify with slices for _ in range(10): a = int(rng.random() * len(source)) b = int(rng.random() * len(source)) if b < a: a, b = b, a - assert allclose(source[a:b], target[a:b]), "No match, index: %d:%d, source: %s, target: %s" % ( + assert allclose(source[a:b], target[a:b]), "No match, index: {}:{}, source: {}, target: {}".format( a, b, ",".join(map(str, source[a : a + 10])), @@ -73,7 +70,7 @@ def test_file(source_target): target.to_file(open("/tmp/foo", "wb")) target2 = FileBinnedArray(open("/tmp/foo", "rb")) for i in range(len(source)): - assert source[i] == target2[i], "No match, index: %d, source: %d, target: %d" % (i, source[i], target2[i]) + assert source[i] == target2[i], f"No match, index: {i}, source: {source[i]}, target: {target2[i]}" # Verify with slices target2 = FileBinnedArray(open("/tmp/foo", "rb")) for _ in range(10): @@ -81,7 +78,7 @@ def test_file(source_target): b = int(rng.random() * len(source)) if b < a: a, b = b, a - assert allclose(source[a:b], target[a:b]), "No match, index: %d:%d, source: %s, target: %s" % ( + assert allclose(source[a:b], target[a:b]), "No match, index: {}:{}, source: {}, target: {}".format( a, b, ",".join(map(str, source[a : a + 10])), @@ -96,7 +93,7 @@ def test_file_lzo(source_target): target3 = FileBinnedArray(open("/tmp/foo3", "rb")) # Verify for i in range(len(source)): - assert source[i] == target3[i], "No match, index: %d, source: %d, target: %d" % (i, source[i], target3[i]) + assert source[i] == target3[i], f"No match, index: {i}, source: {source[i]}, target: {target3[i]}" # Verify with slices target3 = FileBinnedArray(open("/tmp/foo3", "rb")) for _ in range(10): @@ -104,7 +101,7 @@ def test_file_lzo(source_target): b = int(rng.random() * len(source)) if b < a: a, b = b, a - assert allclose(source[a:b], target3[a:b]), "No match, index: %d:%d, source: %s, target: %s" % ( + assert allclose(source[a:b], target3[a:b]), "No match, index: {}:{}, source: {}, target: {}".format( a, b, ",".join(map(str, source[a : a + 10])), @@ -124,8 +121,4 @@ def test_binned_array_writer(source_target): # Verify target4 = FileBinnedArray(open("/tmp/foo4", "rb")) for i in range(len(source)): - assert allclose(source[i], target4[i]), "No match, index: %d, source: %d, target: %d" % ( - i, - source[i], - target4[i], - ) + assert allclose(source[i], target4[i]), f"No match, index: {i}, source: {source[i]}, target: {target4[i]}" diff --git a/lib/bx/cookbook/__init__.py b/lib/bx/cookbook/__init__.py index 2e3008b..f6ed35c 100644 --- a/lib/bx/cookbook/__init__.py +++ b/lib/bx/cookbook/__init__.py @@ -4,7 +4,7 @@ import types -seq_types = type(()), type([]) +seq_types = (tuple, list) def flatten(*args): diff --git a/lib/bx/cookbook/doc_optparse.py b/lib/bx/cookbook/doc_optparse.py index af8ecc8..e157ed7 100644 --- a/lib/bx/cookbook/doc_optparse.py +++ b/lib/bx/cookbook/doc_optparse.py @@ -44,7 +44,8 @@ def nonzero(self): # will become the nonzero method of optparse.Values return False -optparse.Values.__nonzero__ = nonzero # dynamically fix optparse.Values +# dynamically fix optparse.Values +optparse.Values.__nonzero__ = nonzero # type: ignore[attr-defined] class ParsingError(Exception): diff --git a/lib/bx/interval_index_file.py b/lib/bx/interval_index_file.py index f1d5293..bfdea46 100644 --- a/lib/bx/interval_index_file.py +++ b/lib/bx/interval_index_file.py @@ -99,12 +99,12 @@ try: from bx.misc import seekbzip2 except ImportError: - seekbzip2 = None + seekbzip2 = None # type: ignore[assignment] try: from bx.misc import seeklzop except ImportError: - seeklzop = None + seeklzop = None # type: ignore[assignment] __all__ = ["Indexes", "Index"] @@ -141,7 +141,7 @@ def offsets_for_max_size(max_size): if max_size < max: break else: - raise Exception("%d is larger than the maximum possible size (%d)" % (max_size, BIN_OFFSETS_MAX[0])) + raise Exception(f"{max_size} is larger than the maximum possible size ({BIN_OFFSETS_MAX[0]})") return BIN_OFFSETS[(len(BIN_OFFSETS) - i - 1) :] @@ -158,7 +158,7 @@ def bin_for_range(start, end, offsets=None): else: start_bin >>= BIN_NEXT_SHIFT end_bin >>= BIN_NEXT_SHIFT - raise Exception("Interval (%d,%d) out of range" % (start, end)) + raise Exception(f"Interval ({start},{end}) out of range") class AbstractMultiIndexedAccess: @@ -166,7 +166,7 @@ class AbstractMultiIndexedAccess: Allows accessing multiple indexes / files as if they were one """ - indexed_access_class = None + indexed_access_class: type["AbstractIndexedAccess"] def __init__(self, filenames, index_filenames=None, keep_open=False, use_cache=False, **kwargs): # TODO: Handle index_filenames argument @@ -319,9 +319,7 @@ def open(self, filename): raise Exception("File does not have expected header") if version > VERSION: warn( - "File claims version %d, I don't known anything about versions beyond %d. Attempting to continue", - version, - VERSION, + f"File claims version {version}, I don't known anything about versions beyond {VERSION}. Attempting to continue" ) self.version = version for _ in range(length): @@ -509,12 +507,12 @@ def write_packed_uints(f, v, num_bytes): v >>= 32 num_bytes -= 4 parts.reverse() # (write most-significant chunk first) - write_packed(f, ">%dI" % len(parts), *parts) + write_packed(f, f">{len(parts)}I", *parts) -def unpack_uints(parts): - chunks = len(parts) / 4 - vals = unpack(">%dI" % chunks, parts) +def unpack_uints(parts: bytes): + chunks = len(parts) // 4 + vals = unpack(f">{chunks}I", parts) val = vals[0] for v in vals[1:]: val = (val << 32) + v diff --git a/lib/bx/interval_index_file_tests.py b/lib/bx/interval_index_file_tests.py index 1316a7a..1df0eb8 100644 --- a/lib/bx/interval_index_file_tests.py +++ b/lib/bx/interval_index_file_tests.py @@ -20,7 +20,7 @@ def test_interval_index_file(): chrs = [] for i in range(5): intervals = [] - name = "seq%d" % i + name = f"seq{i}" max = random.randint(0, interval_index_file.MAX) # print name, "size", max for i in range(500): @@ -40,7 +40,7 @@ def test_interval_index_file(): ix = Indexes(fname) for i in range(5): intervals = chrs[i] - name = "seq%d" % i + name = f"seq{i}" for i in range(100): start = random.randint(0, max) end = random.randint(0, max) diff --git a/lib/bx/intervals/io.py b/lib/bx/intervals/io.py index 08eca9e..37851dc 100644 --- a/lib/bx/intervals/io.py +++ b/lib/bx/intervals/io.py @@ -47,18 +47,18 @@ def __init__(self, reader, fields, chrom_col, start_col, end_col, strand_col, de self.nfields = nfields = len(fields) # Parse chrom/source column if chrom_col >= nfields: - raise MissingFieldError("No field for chrom_col (%d)" % chrom_col) + raise MissingFieldError(f"No field for chrom_col ({chrom_col})") self.chrom = fields[chrom_col].strip() # Parse start column and ensure it is an integer if start_col >= nfields: - raise MissingFieldError("No field for start_col (%d)" % start_col) + raise MissingFieldError(f"No field for start_col ({start_col})") try: self.start = int(fields[start_col]) except ValueError as e: raise FieldFormatError("Could not parse start_col: " + str(e), expected="integer") # Parse end column and ensure it is an integer if end_col >= nfields: - raise MissingFieldError("No field for end_col (%d)" % end_col) + raise MissingFieldError(f"No field for end_col ({end_col})") try: self.end = int(fields[end_col]) except ValueError as e: diff --git a/lib/bx/intervals/operations/quicksect.py b/lib/bx/intervals/operations/quicksect.py index 65abfd6..46df168 100644 --- a/lib/bx/intervals/operations/quicksect.py +++ b/lib/bx/intervals/operations/quicksect.py @@ -5,12 +5,7 @@ import math import random - -try: - from time import process_time -except ImportError: - # For compatibility with Python < 3.3 - from time import clock as process_time +from time import process_time class IntervalTree: @@ -158,7 +153,7 @@ def main(): def test_func(node): - print("[%d, %d), %d" % (node.start, node.end, node.maxend)) + print(f"[{node.start}, {node.end}), {node.maxend}") def bad_sect(lst, int_start, int_end): diff --git a/lib/bx/intervals/random_intervals.py b/lib/bx/intervals/random_intervals.py index 30ac966..c2865cb 100644 --- a/lib/bx/intervals/random_intervals.py +++ b/lib/bx/intervals/random_intervals.py @@ -180,8 +180,7 @@ def throw_random_private(lengths, regions, save_interval_func, allow_overlap=Fal hi_rgn += 1 if candidates == 0: raise MaxtriesException( - "No region can fit an interval of length %d (we threw %d of %d)" - % (length, num_thrown, len(lengths)) + f"No region can fit an interval of length {length} (we threw {num_thrown} of {len(lengths)})" ) hi_rgn -= 1 # Select a candidate @@ -211,13 +210,9 @@ def throw_random_private(lengths, regions, save_interval_func, allow_overlap=Fal rgn_length, rgn_start, rgn_extra = regions.pop(lo) rgn_end = rgn_start + rgn_length assert s >= 0 - assert rgn_start + s + length <= rgn_end, "Expected: %d + %d + %d == %d <= %d" % ( - rgn_start, - s, - length, - rgn_start + s + length, - rgn_end, - ) + assert ( + rgn_start + s + length <= rgn_end + ), f"Expected: {rgn_start} + {s} + {length} == {rgn_start + s + length} <= {rgn_end}" regions.reverse() if s >= min_length: bisect.insort(regions, (s, rgn_start, rgn_extra)) diff --git a/lib/bx/misc/seekbzip2_tests.py b/lib/bx/misc/seekbzip2_tests.py index 8942b26..24e98b2 100644 --- a/lib/bx/misc/seekbzip2_tests.py +++ b/lib/bx/misc/seekbzip2_tests.py @@ -6,11 +6,12 @@ import os import random from codecs import encode +from typing import Optional from bx.misc import seekbzip2 -F = None -T = None +F: Optional[str] = None +T: Optional[str] = None # F="/Users/james/work/seek-bzip2/test_random.dat.bz2" # T="/Users/james/cache/hg18/align/multiz28way/chr10.maf.bz2" @@ -44,7 +45,7 @@ def test_random_seeking(): a = f.read(chunk) b = raw_data[seek_to : seek_to + chunk] - assert a == b, "'%s' != '%s' on %dth attempt" % (encode(a, "hex"), encode(b, "hex"), i) + assert a == b, "'{}' != '{}' on {}th attempt".format(encode(a, "hex"), encode(b, "hex"), i) assert f.tell() == min(seek_to + chunk, len(raw_data)) f.close() @@ -57,10 +58,10 @@ def test_text_reading(): f = seekbzip2.SeekableBzip2File(T, T + "t") pos = 0 for i, (line, raw_line) in enumerate(zip(f, raw_file)): - assert line == raw_line, "%d: %r != %r" % (i, line.rstrip(b"\n"), raw_line) + assert line == raw_line, "{}: {!r} != {!r}".format(i, line.rstrip(b"\n"), raw_line) pos += len(line) ftell = f.tell() - assert ftell == pos, "%d != %d" % (ftell, pos) + assert ftell == pos, f"{ftell} != {pos}" f.close() def test_text_reading_2(): @@ -76,6 +77,6 @@ def test_text_reading_2(): assert line.rstrip(b"\r\n") == raw_lines[i], "{!r} != {!r}".format(line.rstrip(b"\r\n"), raw_lines[i]) pos += len(line) ftell = f.tell() - assert ftell == pos, "%d != %d" % (ftell, pos) + assert ftell == pos, f"{ftell} != {pos}" i += 1 f.close() diff --git a/lib/bx/motif/io/transfac.py b/lib/bx/motif/io/transfac.py index f412cbe..3ac8541 100644 --- a/lib/bx/motif/io/transfac.py +++ b/lib/bx/motif/io/transfac.py @@ -220,7 +220,7 @@ def write(self, motif): print(prefix, " ", " ".join(s.rjust(6) for s in matrix.alphabet), file=output) for i in range(matrix.width): print( - "%02d" % (i + 1), + f"{i + 1:02d}", " ", " ".join( str(matrix.values[i, matrix.char_to_index[ord(s)]]).rjust(6) for s in matrix.alphabet diff --git a/lib/bx/motif/pwm.py b/lib/bx/motif/pwm.py index 32f433a..1e9f5be 100644 --- a/lib/bx/motif/pwm.py +++ b/lib/bx/motif/pwm.py @@ -16,7 +16,7 @@ zeros, ) -from . import _pwm +from . import _pwm # type: ignore[attr-defined] class BaseMatrix: diff --git a/lib/bx/pwm/position_weight_matrix.py b/lib/bx/pwm/position_weight_matrix.py index 6f54002..124c0db 100755 --- a/lib/bx/pwm/position_weight_matrix.py +++ b/lib/bx/pwm/position_weight_matrix.py @@ -32,8 +32,7 @@ def __init__(self, seqrows, headers=None): ncol = len(row) elif ncol != len(row): raise ValueError( - "Align: __init__:alignment block:row %d does not have %d columns, it has %d" - % (rownum, ncol, len(row)) + f"Align: __init__:alignment block:row {rownum} does not have {ncol} columns, it has {len(row)}" ) except Exception: print(row) @@ -565,10 +564,6 @@ def corrected_probability_score(self, freq, base, i): f = float(freq[i][base]) s = self.pseudocount(base) N = self.sites - # print >>sys.stderr, "f:%.3f + s:%.3f = %.3f" % (f,s,f + s) - # print >>sys.stderr, "-------------------------" - # print >>sys.stderr, "N:%d + %d = %d" % (N,self.pseudocount(), N + self.pseudocount()) - # print >>sys.stderr, "\t\t %.3f\n" % ((f + s) / (N + self.pseudocount())) assert (f + s) > 0 return (f + s) / (N + self.pseudocount()) @@ -577,18 +572,12 @@ def pwm_score(self, base, i, freq, background=None): if background is None: background = self.background p = self.score_correction(freq, base, i) - # print >>sys.stderr, p - # print >>sys.stderr, "k %d %c" % (i,base),freq[i][base] b = background[base] try: return math.log(p / b, 2) except OverflowError: - # print >>sys.stderr,"base=%c, math.log(%.3f / %.3f)" % (base,p,b) - # print >>sys.stderr,self.id return float("nan") except ValueError: - # print >>sys.stderr,"base=%c, math.log(%.3f / %.3f)" % (base,p,b) - # print >>sys.stderr,self.id return float("nan") def parse_weight(self, weightString): @@ -612,10 +601,9 @@ def __str__(self): headers = [f"{nt}" for nt in self.alphabet] lines.append("P0\t" + "\t".join(headers)) for ix in range(0, len(self.rows)): - weights = ["%d" % self.counts[ix][nt] for nt in self.alphabet] - # lines.append(("%02d\t" % ix) + "\t".join(weights) + "\t" + self.consensus[ix]) + weights = [f"{self.counts[ix][nt]}" for nt in self.alphabet] lines.append( - ("%02d\t" % ix) + f"{ix:02d}\t" + "\t".join(weights) + "\t" + str(sum(self.counts[ix].values())) @@ -677,9 +665,9 @@ def close(self): def where(self): if self.name is None: - return "line %d" % self.lineNumber + return f"line {self.lineNumber}" else: - return "line %d in %s" % (self.lineNumber, self.name) + return f"line {self.lineNumber} in {self.name}" def __iter__(self): if self.format == "basic": @@ -888,10 +876,9 @@ def consensus_symbol(pattern): def match_consensus(sequence, pattern): return c_match_consensus(sequence, pattern, len(sequence)) - # print >>sys.stderr, "C match_consensus used" except ImportError: - # print >>sys.stderr, "python match_consensus used" - def match_consensus(sequence, pattern, size): + + def match_consensus(sequence, pattern): for s, p in zip(sequence, pattern): if p == "N": continue diff --git a/lib/bx/seq/fasta.py b/lib/bx/seq/fasta.py index 83f71b0..0df3d00 100644 --- a/lib/bx/seq/fasta.py +++ b/lib/bx/seq/fasta.py @@ -43,7 +43,7 @@ def __init__(self, file, revcomp=False, name="", gap=None, lookahead=None, conti self.lookahead = lookahead if contig is None: contig = 1 - assert contig >= 1, "contig %d is not legal" % contig + assert contig >= 1, f"contig {contig} is not legal" # nota bene: certainly not the most efficient or elegant implementation @@ -73,7 +73,7 @@ def __init__(self, file, revcomp=False, name="", gap=None, lookahead=None, conti self.text = line # (allows headerless fasta) else: self.text.extend(line) - assert currContig == contig, "contig %d is not legal (file contains only %d)" % (contig, currContig) + assert currContig == contig, f"contig {contig} is not legal (file contains only {currContig})" if self.text is not None: self.text = "".join(self.text) self.length = len(self.text) diff --git a/lib/bx/seq/nib.py b/lib/bx/seq/nib.py index 4b1c16b..c83dfc5 100644 --- a/lib/bx/seq/nib.py +++ b/lib/bx/seq/nib.py @@ -27,7 +27,7 @@ SeqFile, SeqReader, ) -from . import _nib +from . import _nib # type: ignore[attr-defined] NIB_MAGIC_NUMBER = 0x6BE93D3A NIB_MAGIC_NUMBER_SWAP = 0x3A3DE96B diff --git a/lib/bx/seq/qdna.py b/lib/bx/seq/qdna.py index e7c5d69..6685d17 100644 --- a/lib/bx/seq/qdna.py +++ b/lib/bx/seq/qdna.py @@ -204,18 +204,18 @@ def read_codebook(self, codeF): fields = line.split(None) if len(fields) != 5: - raise ValueError("wrong vector size (line %d)" % lineNum) + raise ValueError(f"wrong vector size (line {lineNum})") try: codeNum = int(fields[0], 16) except ValueError: - raise ValueError("bad character code %s (line %d)" % (fields[0], lineNum)) + raise ValueError(f"bad character code {fields[0]} (line {lineNum})") if not 0 <= codeNum <= 255: - raise ValueError("character code %s is outside the valid range (line %d)" % (fields[0], lineNum)) + raise ValueError(f"character code {fields[0]} is outside the valid range (line {lineNum})") if chr(codeNum) in codeToProbs: - raise ValueError("character code %s appears more than once (line %d)" % (fields[0], lineNum)) + raise ValueError(f"character code {fields[0]} appears more than once (line {lineNum})") try: vec = {} @@ -225,7 +225,7 @@ def read_codebook(self, codeF): raise ValueError vec[alphabet[ix - 1]] = p except Exception: - raise ValueError("%s is a bad probability value (line %d)" % (fields[ix], lineNum)) + raise ValueError(f"{fields[ix]} is a bad probability value (line {lineNum})") codeToProbs[chr(codeNum)] = vec diff --git a/lib/bx/seq/seq.py b/lib/bx/seq/seq.py index 16cc047..ad02b9b 100644 --- a/lib/bx/seq/seq.py +++ b/lib/bx/seq/seq.py @@ -88,8 +88,8 @@ def get(self, start, length): AssertionError will be generated. """ # Check parameters - assert length >= 0, "Length must be non-negative (got %d)" % length - assert start >= 0, "Start must be greater than 0 (got %d)" % start + assert length >= 0, f"Length must be non-negative (got {length})" + assert start >= 0, f"Start must be greater than 0 (got {start})" assert ( start + length <= self.length ), f"Interval beyond end of sequence ({start}..{start + length} > {self.length})" diff --git a/lib/bx/seq/seq_tests.py b/lib/bx/seq/seq_tests.py index da5fcd8..2499cbd 100644 --- a/lib/bx/seq/seq_tests.py +++ b/lib/bx/seq/seq_tests.py @@ -48,7 +48,7 @@ def test_get_reader(self): assert ix < len(valid2_fa), "FastaReader returns too many sequences" text = f"{seq}" fields = text.split() - assert len(fields) == 2, 'SeqReader.__str__ returns incorrect sequence string "%s" (%d)' % text + assert len(fields) == 2, f'SeqReader.__str__ returns incorrect sequence string "{text}"' assert ( fields[0] == valid2_fa[ix][0] ), f"FastaReader returned the wrong name ({fields[0]},{valid2_fa[ix][0]})" diff --git a/lib/bx/seq/twobit.py b/lib/bx/seq/twobit.py index 5f69a6d..d1adedb 100644 --- a/lib/bx/seq/twobit.py +++ b/lib/bx/seq/twobit.py @@ -7,14 +7,9 @@ calcsize, unpack, ) -from typing import ( - BinaryIO, - Dict, - List, - Tuple, -) +from typing import BinaryIO -from . import _twobit +from . import _twobit # type: ignore[attr-defined] TWOBIT_MAGIC_NUMBER = 0x1A412743 TWOBIT_MAGIC_NUMBER_SWAP = 0x4327411A @@ -24,10 +19,10 @@ class TwoBitSequence: - masked_block_sizes: List - masked_block_starts: List - n_block_sizes: List - n_block_starts: List + masked_block_sizes: list + masked_block_starts: list + n_block_sizes: list + n_block_starts: list def __init__(self, tbf, header_offset=None): self.tbf = tbf @@ -78,13 +73,13 @@ def __init__(self, file: BinaryIO, do_mask: bool = True): # Read version self.version = self.read("L") if self.version != TWOBIT_VERSION: - raise Exception("File is version '%d' but I only know about '%d'" % (self.version, TWOBIT_VERSION)) + raise Exception(f"File is version '{self.version}' but I only know about '{TWOBIT_VERSION}'") # Number of sequences in file self.seq_count = self.read("L") # Header contains some reserved space self.reserved = self.read("L") # Read index of sequence names to offsets - index: Dict[str, TwoBitSequence] = {} + index: dict[str, TwoBitSequence] = {} for _ in range(self.seq_count): name = self.read_p_string() offset = self.read("L") @@ -119,7 +114,7 @@ def load_sequence(self, name: str) -> None: # Mark as loaded seq.loaded = True - def read_block_coords(self) -> Tuple[list, list]: + def read_block_coords(self) -> tuple[list, list]: block_count = self.read("L") if block_count == 0: return [], [] diff --git a/lib/bx/seq/twobit_tests.py b/lib/bx/seq/twobit_tests.py index 62d525c..186fcf1 100644 --- a/lib/bx/seq/twobit_tests.py +++ b/lib/bx/seq/twobit_tests.py @@ -43,10 +43,6 @@ def test_random_subseq_matches(filename): start = random.randint(0, length - 2) end = random.randint(start + 1, length) assert t[k].get(start, end) == s[start:end] - assert t[k][start:end] == s[start:end], "seq: %s, start: %d, end: %d\nExpected:\n%s\nActual:\n%s\n" % ( - k, - start, - end, - s[start:end], - t.get(k, start, end), - ) + assert ( + t[k][start:end] == s[start:end] + ), f"seq: {k}, start: {start}, end: {end}\nExpected:\n{s[start:end]}\nActual:\n{t.get(k, start, end)}\n" diff --git a/lib/bx_extras/fpconst.py b/lib/bx_extras/fpconst.py index 7b52187..68a1f78 100644 --- a/lib/bx_extras/fpconst.py +++ b/lib/bx_extras/fpconst.py @@ -35,8 +35,8 @@ # and define appropriate constants if _big_endian: - NaN = struct.unpack("d", b"\x7F\xF8\x00\x00\x00\x00\x00\x00")[0] - PosInf = struct.unpack("d", b"\x7F\xF0\x00\x00\x00\x00\x00\x00")[0] + NaN = struct.unpack("d", b"\x7f\xf8\x00\x00\x00\x00\x00\x00")[0] + PosInf = struct.unpack("d", b"\x7f\xf0\x00\x00\x00\x00\x00\x00")[0] NegInf = -PosInf else: NaN = struct.unpack("d", b"\x00\x00\x00\x00\x00\x00\xf8\xff")[0] diff --git a/lib/bx_extras/lrucache.py b/lib/bx_extras/lrucache.py index 3e60bd0..dc889c0 100644 --- a/lib/bx_extras/lrucache.py +++ b/lib/bx_extras/lrucache.py @@ -199,7 +199,7 @@ def __setattr__(self, name, value): del self.__dict[lru.key] def __repr__(self): - return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap)) + return f"<{str(self.__class__)} ({len(self.__heap)} elements)>" def mtime(self, key): """Return the last modification time for the cache record with key. diff --git a/lib/bx_extras/stats.py b/lib/bx_extras/stats.py index 571bada..f1a89e7 100644 --- a/lib/bx_extras/stats.py +++ b/lib/bx_extras/stats.py @@ -3990,10 +3990,6 @@ def abetai(a, b, x, verbose=1): # AANOVA CALCULATIONS - import LinearAlgebra - - LA = LinearAlgebra - def aglm(data, para): """ Calculates a linear model fit ... anova/ancova/lin-regress/t-test/etc. Taken diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..9bf679c --- /dev/null +++ b/mypy.ini @@ -0,0 +1,17 @@ +[mypy] +enable_error_code = ignore-without-code +show_error_codes = True +ignore_missing_imports = True +# check_untyped_defs = True +exclude = (?x)( + ^build/ + | ^dist/ + ) +pretty = True +no_implicit_reexport = True +no_implicit_optional = True +strict_equality = True +warn_redundant_casts = True +warn_unreachable = True +warn_unused_ignores = True +platform = linux diff --git a/pyproject.toml b/pyproject.toml index 09f70a0..7cdfb04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,16 +1,55 @@ [build-system] requires = [ "cython", - "numpy>=1.25.0; python_version>='3.9'", - "oldest-supported-numpy; python_version<'3.9'", + "numpy>=1.25.0", "setuptools", ] build-backend = "setuptools.build_meta" +[project] +name = "bx-python" +dynamic = ["version"] +authors = [ + { name = "James Taylor", email = "james@jamestaylor.org" }, + { name = "Bob Harris" }, + { name = "David King" }, + { name = "Brent Pedersen" }, + { name = "Kanwei Li" }, + { name = "Nicola Soranzo", email = "nicola.soranzo@earlham.ac.uk" }, +] +description = "Tools for manipulating biological data, particularly multiple sequence alignments" +readme = "README.md" +license = { text = "MIT" } +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Operating System :: POSIX", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Software Development :: Libraries :: Python Modules", +] +requires-python = ">=3.9" +dependencies = [ + "numpy", + "pyparsing", +] + +[project.urls] +Homepage = "https://github.com/bxlab/bx-python" +"Bug Tracker" = "https://github.com/bxlab/bx-python/issues" +"Source Code" = "https://github.com/bxlab/bx-python" + [tool.black] include = '\.pyi?$' line-length = 120 -target-version = ['py38'] +target-version = ['py39'] [tool.cibuildwheel] test-command = """ @@ -75,7 +114,7 @@ EOF isort = true [tool.ruff] -target-version = "py38" +target-version = "py39" [tool.ruff.lint] # Enable: pycodestyle errors (E), Pyflakes (F), flake8-bugbear (B), @@ -89,3 +128,15 @@ select = ["E", "F", "C4", "G", "ISC", "NPY", "UP"] # E501 is line length (delegated to black) # E741 Ambiguous variable name ignore = ["B9", "E501", "E741"] + +[tool.setuptools.dynamic] +version = {attr = "bx.__version__"} + +[tool.setuptools.exclude-package-data] +"*" = ["*.c", "*.h", "*.pxd", "*.pyx"] + +[tool.setuptools.package-data] +"*" = ["*.ps"] + +[tool.setuptools.packages.find] +where = ["lib"] diff --git a/scripts/axt_to_lav.py b/scripts/axt_to_lav.py index 1f26fa8..d9faea3 100755 --- a/scripts/axt_to_lav.py +++ b/scripts/axt_to_lav.py @@ -44,8 +44,6 @@ def usage(s=None): def main(): - global debug - primary = None secondary = None silent = False @@ -110,7 +108,7 @@ def main(): out.close() if not silent: - sys.stderr.write("%d blocks read, %d written\n" % (axtsRead, axtsWritten)) + sys.stderr.write(f"{axtsRead} blocks read, {axtsWritten} written\n") def parse_spec(spec): # returns (seq_file,species_name,lengths_file) @@ -137,16 +135,16 @@ def read_lengths(fileName): fields = line.split() if len(fields) != 2: - raise ValueError("bad lengths line (%s:%d): %s" % (fileName, lineNumber, line)) + raise ValueError(f"bad lengths line ({fileName}:{lineNumber}): {line}") chrom = fields[0] try: length = int(fields[1]) except ValueError: - raise ValueError("bad lengths line (%s:%d): %s" % (fileName, lineNumber, line)) + raise ValueError(f"bad lengths line ({fileName}:{lineNumber}): {line}") if chrom in chromToLength: - raise ValueError("%s appears more than once (%s:%d): %s" % (chrom, fileName, lineNumber, line)) + raise ValueError(f"{chrom} appears more than once ({fileName}:{lineNumber}): {line}") chromToLength[chrom] = length diff --git a/scripts/axt_to_maf.py b/scripts/axt_to_maf.py index 0521361..60968a7 100755 --- a/scripts/axt_to_maf.py +++ b/scripts/axt_to_maf.py @@ -36,8 +36,6 @@ def usage(s=None): def main(): - global debug - ########## # parse the command line ########## @@ -122,7 +120,7 @@ def main(): axtsWritten += 1 if not silent: - sys.stderr.write("%d blocks read, %d written\n" % (axtsRead, axtsWritten)) + sys.stderr.write(f"{axtsRead} blocks read, {axtsWritten} written\n") def clone_component(c): @@ -143,16 +141,16 @@ def read_lengths(fileName): fields = line.split() if len(fields) != 2: - raise ValueError("bad lengths line (%s:%d): %s" % (fileName, lineNumber, line)) + raise ValueError(f"bad lengths line ({fileName}:{lineNumber}): {line}") chrom = fields[0] try: length = int(fields[1]) except ValueError: - raise ValueError("bad lengths line (%s:%d): %s" % (fileName, lineNumber, line)) + raise ValueError(f"bad lengths line ({fileName}:{lineNumber}): {line}") if chrom in chromToLength: - raise ValueError("%s appears more than once (%s:%d): %s" % (chrom, fileName, lineNumber, line)) + raise ValueError(f"{chrom} appears more than once ({fileName}:{lineNumber}): {line}") chromToLength[chrom] = length diff --git a/scripts/bed_complement.py b/scripts/bed_complement.py index b9607ca..109d7d5 100755 --- a/scripts/bed_complement.py +++ b/scripts/bed_complement.py @@ -44,8 +44,8 @@ def read_len(f): end = bits.next_clear(start) if end > len: end = len - print("%s\t%d\t%d" % (chrom, start, end)) + print(f"{chrom}\t{start}\t{end}") if end == len: break else: - print("%s\t%d\t%d" % (chrom, 0, lens[chrom])) + print(f"{chrom}\t0\t{lens[chrom]}") diff --git a/scripts/bed_diff_basewise_summary.py b/scripts/bed_diff_basewise_summary.py index f8d91ec..8f22b02 100755 --- a/scripts/bed_diff_basewise_summary.py +++ b/scripts/bed_diff_basewise_summary.py @@ -39,6 +39,6 @@ def coverage(bitsets): both_covered = coverage(bitsets) -print("in both: \t%d" % both_covered) -print("only in %s:\t%d" % (in_fname, bits1_covered - both_covered)) -print("only in %s:\t%d" % (in2_fname, bits2_covered - both_covered)) +print(f"in both: \t{both_covered}") +print(f"only in {in_fname}:\t{bits1_covered - both_covered}") +print(f"only in {in2_fname}:\t{bits2_covered - both_covered}") diff --git a/scripts/bed_intersect_basewise.py b/scripts/bed_intersect_basewise.py index 7a78f0f..a412c1a 100755 --- a/scripts/bed_intersect_basewise.py +++ b/scripts/bed_intersect_basewise.py @@ -35,4 +35,4 @@ if start == bits.size: break end = bits.next_clear(start) - print("%s\t%d\t%d" % (chrom, start, end)) + print(f"{chrom}\t{start}\t{end}") diff --git a/scripts/bed_merge_overlapping.py b/scripts/bed_merge_overlapping.py index 37d7b3f..d520eac 100755 --- a/scripts/bed_merge_overlapping.py +++ b/scripts/bed_merge_overlapping.py @@ -29,4 +29,4 @@ if start == bits.size: break end = bits.next_clear(start) - print("%s\t%d\t%d" % (chrom, start, end)) + print(f"{chrom}\t{start}\t{end}") diff --git a/scripts/bed_rand_intersect.py b/scripts/bed_rand_intersect.py index cc16b52..ecabb88 100755 --- a/scripts/bed_rand_intersect.py +++ b/scripts/bed_rand_intersect.py @@ -161,8 +161,7 @@ def main(): for row in fraction_overlap: print("\t".join(map(str, row))) print( - "observed overlap: %d, sample mean: %d, sample stdev: %d" - % (total_actual, stats.amean(total_samples), stats.asamplestdev(total_samples)) + f"observed overlap: {total_actual}, sample mean: {stats.amean(total_samples)}, sample stdev: {stats.asamplestdev(total_samples)}" ) print("z-score:", (total_actual - stats.amean(total_samples)) / stats.asamplestdev(total_samples)) print("percentile:", sum(total_actual > total_samples) / nsamples) diff --git a/scripts/bed_subtract_basewise.py b/scripts/bed_subtract_basewise.py index 48bdd34..55cd3a8 100755 --- a/scripts/bed_subtract_basewise.py +++ b/scripts/bed_subtract_basewise.py @@ -18,7 +18,7 @@ def print_bits_as_bed(bits): if start == bits.size: break end = bits.next_clear(start) - print("%s\t%d\t%d" % (chrom, start, end)) + print(f"{chrom}\t{start}\t{end}") options, args = doc_optparse.parse(__doc__) diff --git a/scripts/bnMapper.py b/scripts/bnMapper.py index 689ecd7..a253c85 100755 --- a/scripts/bnMapper.py +++ b/scripts/bnMapper.py @@ -208,8 +208,8 @@ def transform_by_chrom(all_epo, from_elem_list, tree, chrom, opt, out_fd): start, end, len(to_elem_list), - ",".join("%d" % (e[2] - e[1]) for e in to_elem_list), - ",".join("%d" % (e[1] - start) for e in to_elem_list), + ",".join(f"{e[2] - e[1]}" for e in to_elem_list), + ",".join(f"{e[1] - start}" for e in to_elem_list), ) ) else: diff --git a/scripts/div_snp_table_chr.py b/scripts/div_snp_table_chr.py index 13a9756..819a600 100755 --- a/scripts/div_snp_table_chr.py +++ b/scripts/div_snp_table_chr.py @@ -127,10 +127,10 @@ def main(): ind_snp_count = snp[chr].count_range(start, end - start) print(chr, start, end, ind_div_count, ind_snp_count) - print("feature snp\t%d" % feature_snp_count) - print("feature div\t%d" % feature_div_count) - print("ar snp\t%d" % ar_snp_count) - print("ar div\t%d" % ar_div_count) + print(f"feature snp\t{feature_snp_count}") + print(f"feature div\t{feature_div_count}") + print(f"ar snp\t{ar_snp_count}") + print(f"ar div\t{ar_div_count}") # copies a dictionary of bitsets diff --git a/scripts/gene_fourfold_sites.py b/scripts/gene_fourfold_sites.py index f10dafc..68b096b 100755 --- a/scripts/gene_fourfold_sites.py +++ b/scripts/gene_fourfold_sites.py @@ -186,7 +186,7 @@ def main(): try: assert c3 < len(cds_seq) except AssertionError: - print("out of sequence at %d for %s, %d" % (c3, chrom, genome_seq_index[first_pos]), file=sys.stderr) + print(f"out of sequence at {c3} for {chrom}, {genome_seq_index[first_pos]}", file=sys.stderr) continue codon = cds_seq[c1], cds_seq[c2], cds_seq[c3] aa = translate(codon, GEN_CODE) diff --git a/scripts/lav_to_axt.py b/scripts/lav_to_axt.py index 64edd86..e42b070 100755 --- a/scripts/lav_to_axt.py +++ b/scripts/lav_to_axt.py @@ -50,7 +50,7 @@ def main(): axtsWritten += 1 if not silent: - sys.stderr.write("%d blocks read, %d written\n" % (lavsRead, axtsWritten)) + sys.stderr.write(f"{lavsRead} blocks read, {axtsWritten} written\n") if __name__ == "__main__": diff --git a/scripts/lav_to_maf.py b/scripts/lav_to_maf.py index 45ed490..82fae95 100755 --- a/scripts/lav_to_maf.py +++ b/scripts/lav_to_maf.py @@ -48,7 +48,7 @@ def main(): mafsWritten += 1 if not silent: - sys.stderr.write("%d blocks read, %d written\n" % (lavsRead, mafsWritten)) + sys.stderr.write(f"{lavsRead} blocks read, {mafsWritten} written\n") if __name__ == "__main__": diff --git a/scripts/mMK_bitset.py b/scripts/mMK_bitset.py index 671cc1b..0a4e61a 100644 --- a/scripts/mMK_bitset.py +++ b/scripts/mMK_bitset.py @@ -114,13 +114,11 @@ def main(): if options.outfile is not None: out_file.write( - "%s\t%d\t%d\t%d\t%d\t%d\t%d\t%1.15f\n" - % (chr, window, window + window_size, nonAR_snp, nonAR_div, AR_snp, AR_div, MK_pval) + f"{chr}\t{window}\t{window + window_size}\t{nonAR_snp}\t{nonAR_div}\t{AR_snp}\t{AR_div}\t{MK_pval:1.15f}\n" ) else: print( - "%s\t%d\t%d\t%d\t%d\t%d\t%d\t%1.15f" - % (chr, window, window + window_size, nonAR_snp, nonAR_div, AR_snp, AR_div, MK_pval) + f"{chr}\t{window}\t{window + window_size}\t{nonAR_snp}\t{nonAR_div}\t{AR_snp}\t{AR_div}\t{MK_pval:1.15f}" ) if options.outfile is not None: diff --git a/scripts/maf_chunk.py b/scripts/maf_chunk.py index a05d04e..5635d29 100755 --- a/scripts/maf_chunk.py +++ b/scripts/maf_chunk.py @@ -61,7 +61,7 @@ def __main__(): else: write_current_chunk = True if write_current_chunk: - maf_writer = bx.align.maf.Writer(open("%s/%09d.maf" % (out_dir, current_chunk), "w")) + maf_writer = bx.align.maf.Writer(open(f"{out_dir}/{current_chunk:09d}.maf", "w")) else: maf_writer = None count = 0 diff --git a/scripts/maf_covered_regions.py b/scripts/maf_covered_regions.py index ce23526..a544961 100755 --- a/scripts/maf_covered_regions.py +++ b/scripts/maf_covered_regions.py @@ -47,17 +47,7 @@ def main(): pid = block_pid(ref_comp, comp) if pid: out_files[comp_species].write( - "%s\t%d\t%d\t%s:%d-%d,%s\t%f\n" - % ( - ref_chrom, - ref_comp.forward_strand_start, - ref_comp.forward_strand_end, - comp_chrom, - comp.start, - comp.end, - comp.strand, - pid, - ) + f"{ref_chrom}\t{ref_comp.forward_strand_start}\t{ref_comp.forward_strand_end}\t{comp_chrom}:{comp.start}-{comp.end},{comp.strand}\t{pid:f}\n" ) for f in out_files.values(): diff --git a/scripts/maf_div_sites.py b/scripts/maf_div_sites.py index a659a4d..78b589c 100755 --- a/scripts/maf_div_sites.py +++ b/scripts/maf_div_sites.py @@ -48,7 +48,7 @@ def main(): if start == bits.size: break end = bits.next_clear(start) - print("%s\t%d\t%d" % (chrom, start, end)) + print(f"{chrom}\t{start}\t{end}") main() diff --git a/scripts/maf_extract_ranges_indexed.py b/scripts/maf_extract_ranges_indexed.py index bff7b86..ef4bba9 100755 --- a/scripts/maf_extract_ranges_indexed.py +++ b/scripts/maf_extract_ranges_indexed.py @@ -86,7 +86,7 @@ def main(): blocks = index.get(src, start, end) # Open file if needed if dir: - out = bx.align.maf.Writer(open(os.path.join(dir, "%s:%09d-%09d.maf" % (src, start, end)), "w")) + out = bx.align.maf.Writer(open(os.path.join(dir, f"{src}:{start:09d}-{end:09d}.maf"), "w")) # Write each intersecting block if chop: for block in blocks: diff --git a/scripts/maf_to_axt.py b/scripts/maf_to_axt.py index a48688f..2c37554 100755 --- a/scripts/maf_to_axt.py +++ b/scripts/maf_to_axt.py @@ -82,7 +82,7 @@ def main(): out.write(axtBlock) mafsWritten += 1 - sys.stderr.write("%d blocks read, %d written\n" % (axtsRead, mafsWritten)) + sys.stderr.write(f"{axtsRead} blocks read, {mafsWritten} written\n") def clone_component(c): diff --git a/scripts/maf_to_fasta.py b/scripts/maf_to_fasta.py index 01278e6..2af31e2 100755 --- a/scripts/maf_to_fasta.py +++ b/scripts/maf_to_fasta.py @@ -29,7 +29,7 @@ def __main__(): else: l = m.components for c in l: - print(">%s:%d-%d" % (c.src, c.start, c.end)) + print(f">{c.src}:{c.start}-{c.end}") print(c.text) diff --git a/scripts/maf_word_frequency.py b/scripts/maf_word_frequency.py index 170f95b..be53b90 100755 --- a/scripts/maf_word_frequency.py +++ b/scripts/maf_word_frequency.py @@ -37,7 +37,7 @@ def __main__(): items.reverse() for count, motif in items: - print("%d\t%0.10f\t%s" % (count, count / total, motif)) + print(f"{count}\t{count / total:0.10f}\t{motif}") if __name__ == "__main__": diff --git a/scripts/out_to_chain.py b/scripts/out_to_chain.py index 7174473..a2b3232 100755 --- a/scripts/out_to_chain.py +++ b/scripts/out_to_chain.py @@ -38,11 +38,11 @@ def convert_action(trg_comp, qr_comp, ts, qs, opt): if np.sum(S) == 0: log.info("insignificant genomic alignment block %s ...", ch.id) continue - new_id = "%si%d" % (ch.id, i) + new_id = f"{ch.id}i{i}" print(str(ch._replace(id=new_id)), file=opt.output) for s, t, q in zip(S, T, Q): - print("%d %d %d" % (s, t, q), file=opt.output) - print("%d\n" % S[-1], file=opt.output) + print(f"{s} {t} {q}", file=opt.output) + print(f"{S[-1]}\n", file=opt.output) except KeyError: log.warning("skipping chromosome/contig (%s, %s)", a.chrom, b.chrom) diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index ac747af..0000000 --- a/setup.cfg +++ /dev/null @@ -1,59 +0,0 @@ -[metadata] -author = James Taylor, Bob Harris, David King, Brent Pedersen, Kanwei Li, Nicola Soranzo, and others -author_email = james@jamestaylor.org -classifiers = - Development Status :: 5 - Production/Stable - Intended Audience :: Developers - Intended Audience :: Science/Research - License :: OSI Approved :: MIT License - Operating System :: POSIX - Programming Language :: Python :: 3 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: 3.11 - Programming Language :: Python :: 3.12 - Programming Language :: Python :: 3.13 - Topic :: Scientific/Engineering :: Bio-Informatics - Topic :: Software Development :: Libraries :: Python Modules -name = bx-python -description = Tools for manipulating biological data, particularly multiple sequence alignments -license = MIT -long_description = file: README.md -long_description_content_type = text/markdown -project_urls = - Bug Tracker = https://github.com/bxlab/bx-python/issues - Source Code = https://github.com/bxlab/bx-python -url = https://github.com/bxlab/bx-python -version = attr: bx.__version__ - -[options] -install_requires = - numpy - pyparsing -packages = find: -package_dir = - =lib -python_requires = >=3.8 -zip_safe = False - -[options.package_data] -* = *.ps - -[options.packages.find] -where=lib - -[aliases] -snapshot = egg_info -rb_DEV bdist_egg rotate -m.egg -k1 -build_docs = build_sphinx build_apidocs - -[flake8] -ignore = E203,E501,E701,E704,E741,W503 -exclude = .git,.tox,.venv,build,doc/source/conf.py -import-order-style = smarkets -application-import-names = bx,bx_extras - -[build_sphinx] -source-dir = doc/source -build-dir = doc/docbuild -all_files = 1 diff --git a/tox.ini b/tox.ini index 0ae8300..f4fc6e6 100644 --- a/tox.ini +++ b/tox.ini @@ -8,6 +8,7 @@ commands = lint: flake8 . lint: black --check --diff . lint: isort --check --diff . + lint: mypy lib/ deps = test: Cython test: numpy @@ -17,6 +18,7 @@ deps = lint: black lint: flake8 lint: isort + lint: mypy lint: ruff package = test: editable