-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkmer.java
More file actions
94 lines (88 loc) · 3.21 KB
/
kmer.java
File metadata and controls
94 lines (88 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
// example usage
// java KMers seq/sequences.fa 1 1000
/**
*
* @author rgoldst
*/
public class KMers {
HashMap<String, String> sequenceData = new HashMap<>();
String refSequence = "";
int kLength = 15;
double nSeq = 0.;
int initialStart = 0;
int endPos = 1000;
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
KMers kmers = new KMers();
kmers.readFiles(args);
kmers.run();
}
KMers() {
}
void readFiles(String[] args){
if (args.length > 1) {
initialStart = Integer.parseInt(args[1]);
endPos = Integer.parseInt(args[2]);
}
boolean firstSeq = true;
try {
FileReader file = new FileReader(args[0]);
BufferedReader buff = new BufferedReader(file);
boolean eof = false;
while (!eof) {
String line = buff.readLine();
if (line == null) {
eof = true;
} else if (firstSeq) {
refSequence = buff.readLine();
firstSeq = false;
} else {
String name = line.replaceFirst(">", "");
String seq = buff.readLine();
sequenceData.put(name, seq);
System.out.println(name);
}
}
} catch (IOException ioe) {
System.out.println("Error -- " + ioe.toString());
System.exit(1);
}
nSeq = 1. * sequenceData.size();
}
void run() { //the meat
int nStarts = refSequence.length() - kLength; //length of kmer, 15
// if this run will encouter the last base of the sequence then
if(nStarts < endPos) {
endPos = nStarts;
}
for (int iStart = initialStart; iStart < endPos; iStart++) { //take kmer from 1-15, 2-16
String k = refSequence.substring(iStart, iStart + kLength);
double nHit = 0.;
for (String otherSeq : sequenceData.values()) { //look at every other sequence, is that kmer contained in the reference sequence at all?
if (otherSeq.contains(k)) { // if contains string
nHit++;
} else {
boolean intOneMiss = false; // if doesnt contain that string //genius code
for (int iSite = 0; iSite < kLength; iSite++) {
String oneAway = ".*" + k.substring(0,iSite) + "."; // regex grep type thing
if (iSite < kLength-1) {
oneAway = oneAway + k.substring(iSite+1, kLength);
}
oneAway = oneAway + ".*";
intOneMiss = intOneMiss || otherSeq.matches(oneAway);
}
if (intOneMiss) {
nHit++;
}
}
}
System.out.println(iStart + "\t" + (nHit/nSeq) + "\t" + k); // what fraction of alt sequences contain that.
}
}
}