-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathcorrect_one-code_with-name.py
More file actions
74 lines (64 loc) · 2.22 KB
/
correct_one-code_with-name.py
File metadata and controls
74 lines (64 loc) · 2.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python3
import argparse
from typing import NamedTuple
parser = argparse.ArgumentParser(
description="Parse one-code output and eliminate lines that are contained in previously identified loci"
)
parser.add_argument("-i", help="One code output file")
parser.add_argument("-o", help="Output file name")
options = parser.parse_args()
class Element:
def __init__(self, line, support):
F = line.strip().split("\t")
self.line = line
self.chrom = F[4]
self.parts = F[0]
self.start = int(F[5])
self.stop = int(F[6])
self.fam = F[9]
self.support = support
def containedIn(self, previous):
if self.chrom == previous.chrom:
return (previous.start <= self.start) and (previous.stop >= self.stop)
else:
return False
chroms = {}
outlines = []
header = ""
# Parse data
with open(options.i) as fi:
for line in fi:
if line.startswith("Score"):
header = line
continue
supportlines = []
if not line.startswith("###"):
continue
numparts = len(line.split("\t")[0].split("/"))
if numparts > 1:
supportlines.extend(fi.readline() for _ in range(numparts))
newelem = Element(line, supportlines)
if newelem.chrom not in chroms:
chroms[newelem.chrom] = [newelem]
elif not newelem.containedIn(chroms[newelem.chrom][-1]):
chroms[newelem.chrom].append(newelem)
# Produce output
with open(options.o, "w") as fo:
fo.write(header)
for ch in chroms:
# write all elements to output
for elem in chroms[ch]:
# Get internal part to name the LTR element
if elem.fam.endswith("_LTR") or elem.fam.endswith("_I"):
if elem.fam.endswith("_LTR"):
newfam = elem.fam[:-4]
if elem.fam.endswith("_I"):
newfam = elem.fam[:-2]
tmpline = elem.line.split("\t")
tmpline[9] = newfam
elem.line = "\t".join(tmpline)
fo.write(elem.line)
# Write all supporting lines
for sl in elem.support:
fo.write(sl)
fo.write("\n")