-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdesmatrix_remove_wt.py
More file actions
executable file
·110 lines (93 loc) · 3.73 KB
/
desmatrix_remove_wt.py
File metadata and controls
executable file
·110 lines (93 loc) · 3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/python
##Usage: desmatrix_remove_wt.py wtseq.txt matrix.txt des_matrix.csv
##wtseq.txt is a file with the native sequence as a single line of one-letter AA codes.
##matrix.txt is the design matrix file generated by ConvertSeqTransfac.py
##The script re-generates the matrix.txt file, discarding residues that all have the same identity, and that identity matches the WT.
##The file is written out in CSV format to des_matrix.csv
import os, sys, csv
wtseqname = sys.argv[1]
wtseqfile = open(wtseqname, 'r')
wtseq = wtseqfile.readlines()
inmatrixname = sys.argv[2]
matrixfile = open(inmatrixname, 'r')
matrix = matrixfile.readlines()
outname = sys.argv[3]
outfile = open(outname, 'a')
#Read in the native sequence to a string, wt.
wt = wtseq[0]
#The second line should be read in as the header row.
headers = matrix[1].split()
#Starting at the third line, read each line in, convert to a list, and place in a list of lists. The first column is the position, and the next 20 correspond to each AA.
pos = []
linenum = 0
for line in matrix:
if ( linenum > 1 ):
line = line.split()
pos.append(line)
linenum = linenum + 1
#Each item in pos is an amino acid position.
#Create an empty list for output.
outputlist = []
#Loop over each item in pos.
for posidx,aalist in enumerate(pos):
#Assume that the position is designed.
designedpos = True
#Check if any of the list items are '1.0000'
for seqidx,seqpct in enumerate(aalist):
if ( seqpct == '1.0000' ):
#If so, check if it matches the WT sequence
if ( wt[posidx] == headers[seqidx] ):
#If so, we have 100% WT. Indicate that this position has not been designed, and skip to the next position.
designedpos = False
break
#We have checked each AA in the list. If we did not encounter a 1.000 at the WT position, write the line the an output list.
if ( designedpos ):
aalist.append(wt[posidx]) #Add the WT AA identity to the end of the line before writing out.
outputlist.append(aalist)
headers.append('wtid')
outfile.write(','.join(map(str,headers)))
outfile.write('\n')
for position in outputlist:
outfile.write(','.join(map(str,position)))
outfile.write('\n')
'''
#Go through the header row and determine what column total_score and description are in.
colnum = 0
for col in csvscores[1]:
if (col == 'total_score'):
totalcol = colnum
if (col == 'description'):
namecol = colnum
colnum = colnum + 1
bestscore = 999999999999.9
bestpose = ''
#For each file in the list of PDBs
for posefile in pdblist:
#Set that we haven't found this pose yet
foundpose = False
#Go through each row, keeping track of which number we're on
rownum = 0
for row in csvscores:
#Make sure we are not in a header row
if ((len(csvscores[rownum]) <= namecol) or (len(csvscores[rownum]) <= totalcol)):
rownum = rownum + 1
continue
#See if that row contains the score for the current PDB file in the list
if (csvscores[rownum][namecol] == posefile.rstrip()):
#If so, indicate that we've found the pose
foundpose = True
#If so, check if that pose has a better score than the current pose
if (float(csvscores[rownum][totalcol]) < float(bestscore)):
#If it has a better score, replace bestscore with total_score, and replace bestpose with description.
bestscore = csvscores[rownum][totalcol]
bestpose = csvscores[rownum][namecol]
#And since we've found our target, break out of the for loop
break
rownum = rownum + 1
if (foundpose == False):
print "WARNING: The output PDB " + posefile + " given in " + pdbfilelist + " was not found in " + myscores
print "From the list of output PDBs given in " + myscores + ", the best scoring pose is " + bestpose
print "It has a total_score of " + str(bestscore)
outfile = open('bestscores.txt', 'a')
outfile.write(bestpose + '.pdb\n')
'''