-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDistance.java
More file actions
99 lines (68 loc) · 1.97 KB
/
Distance.java
File metadata and controls
99 lines (68 loc) · 1.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/**
* Implements the k-tuple distance of Yang et al.: Yang et al. (2008)
* Performance comparison between k-tuple distance and four
* model-based distances in phylogenetic tree
* reconstruction. Nucleic Acids Res. 36(5) :e33,
* doi:10.1093/nar/gkn075.
*
* @author Marcel Turcotte (turcotte@eecs.uottawa.ca)
*/
public class Distance {
// Helper method. Uses a queue to generate all possible keys of
// size k.
private static void init(FrequencyTable t, int k) {
String[] alphabet = {"A", "C", "G", "T"};
LinkedList<String> keys = new LinkedList<String>();
for (String a : alphabet) {
keys.addLast(a);
}
boolean done = false;
while (! done) {
String key = keys.get(0);
if (key.length() == k) {
done = true;
} else {
keys.removeFirst();
for (String a : alphabet) {
keys.addLast(a+key);
}
}
}
for (int i=0; i<keys.size(); i++) {
t.init(keys.get(i));
}
}
// Helper method. Extracts the k-tuples from s and updates the
// frequency table.
private static void update(FrequencyTable t, int k, String s) {
for (int i=0; i < s.length() - k + 1; i++) {
t.update(s.substring(i, i+k));
}
}
/** Returns the k-tuples distance of the two input strings.
*
* @param k the size of the k-tuples (k-grams)
* @param a input string
* @param b input string
* @return the k-tuples distance
*/
public static double compare(int k, String a, String b) {
FrequencyTable fa = Utils.getFrequencyTable();
FrequencyTable fb = Utils.getFrequencyTable();
init(fa, k);
init(fb, k);
update(fa, k, a);
update(fb, k, b);
long[] as = fa.values();
long[] bs = fb.values();
assert as.length == bs.length;
double distance = 0.0;
for (int i=0; i<as.length; i++) {
double x, y;
x = as[i] / (double) (a.length() - k + 1);
y = bs[i] / (double) (b.length() - k + 1);
distance += Math.pow(x - y, 2.0);
}
return distance;
}
}