forked from karoka/Kennard-Stone-Algorithm
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkenStone.py
More file actions
69 lines (61 loc) · 1.9 KB
/
kenStone.py
File metadata and controls
69 lines (61 loc) · 1.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import numpy as np
from dist import *
def loadKS(input):
try:
X = np.loadtxt(input, delimiter="\t")
except:
import pandas as pd
X = pd.read_table(input, delim_whitespace=True, header=None)
return X.values
return X
def kenStone(X, k, precomputed=False, verbose=False):
n = len(X) # number of samples
if verbose:
print("Input Size:", n, "Desired Size:", k)
assert n >= 2 and n >= k and k >= 2, "Error: number of rows must >= 2, k must >= 2 and k must > number of rows"
# pair-wise distance matrix
dist = skdist(X, precomputed)
# get the first two samples
i0, i1 = np.unravel_index(np.argmax(dist, axis=None), dist.shape)
selected = set([i0, i1])
k -= 2
# iterate find the rest
minj = i0
while k > 0 and len(selected) < n:
mindist = 0.0
for j in range(n):
if j not in selected:
mindistj = min([dist[j][i] for i in selected])
if mindistj > mindist:
minj = j
mindist = mindistj
if verbose:
print(selected, minj, [dist[minj][i] for i in selected])
selected.add(minj)
k -= 1
if verbose:
print("selected samples indices: ", selected)
# return selected samples
if precomputed:
return list(selected)
else:
return X[list(selected), :]
def writeKS(output, X, precomputed=False):
if precomputed:
np.savetxt(output, X, fmt='%d')
else:
np.savetxt(output, X, fmt='%.5f')
def test():
# take features
input = 'test/distArray.txt'
X = loadKS(input)
Y = kenStone(X, 10)
writeKS('test/KSfeatures.txt', Y)
# precomputed
input = 'test/matrix.txt'
X = loadKS(input)
Y = kenStone(X, 10, precomputed=True)
writeKS('test/KSelected.txt', Y, precomputed=True)
if __name__ == "__main__":
test()
print("File saved")