-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathdemo.py
More file actions
140 lines (115 loc) · 4.54 KB
/
demo.py
File metadata and controls
140 lines (115 loc) · 4.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from time import time
import argparse
import kmeans
import matplotlib.pyplot as plt
import numpy as np
def make_blobs(n_points):
"""Convenience function to create data in two clusters, one around
(0.5, 0,5) and the other (-0.5, -0.5).
Parameters
----------
n_points : int, total number of data points in both clusters
Returns
-------
ndarray, 2D
"""
points = np.array([[0.5, 0.5], [-0.5, -0.5]])
noise = np.random.normal(0, .25, size=(int(n_points/2), 2))
noised_points = points[:, None] + noise
return noised_points.reshape(-1, 2)
def plot_setup(centroids, assignments, ax, title=None, legend=False):
"""Convenience function to plot clusters and centroids.
Parameters
----------
centroids : list-like 2D
assignments : list of list-like
ax : matplotlib axis object
title : str, to put on ax
legend : bool, plot legend
"""
for centroid, assigns, color in zip(centroids, assignments, 'br'):
size_alpha = 1 / (np.log2(len(assignments)) + 2)
ax.scatter(*zip(*assigns), c=color, alpha=size_alpha, s=30, label='blob')
ax.scatter(*centroid, c='k', marker='*', alpha=0.7, s=300, label='center')
if title:
ax.set_title(title, fontsize=15)
if legend:
plt.legend(loc=2)
def plot_single(X, km, title, legend=False):
"""Cluster and plot results for single algorithm.
Parameters
----------
X : list-like 2D
km : function, kmeans clustering function
title : str, for plot
legend : bool, plot legend
"""
fig, ax = plt.subplots(figsize=(8, 8))
centroids, assignments = km(X, k=2)
plot_setup(centroids, assignments, ax, legend=legend)
fig.suptitle(title, fontsize=20)
plt.show()
def plot_cluster_comp(X):
"""Plots the results of both clustering implementations side-by-side
with the timing for the implementations above the clustering results.
Parameters
----------
X : list-like 2D
"""
fig, axs = plt.subplots(1, 2, figsize=(16, 8))
params = zip(axs, (X.tolist(), X), ('Base Python', 'NumPy'),
(kmeans.base_python, kmeans.numpy))
for ax, data, algo, km in params:
start_time = time()
centroids, assignments = km(data, k=2)
total_time = time() - start_time
timed_title = '{}: {:.2f} seconds'.format(algo, total_time)
plot_setup(centroids, assignments, ax, timed_title)
fig.suptitle('Timing - {} Data Points'.format(X.shape[0]), fontsize=20)
plt.show()
def plot_timing_comp(data_sizes=(10, 100, 1000, 2500, 5000)):
"""Plots the time taken to cluster on given data over some data sizes.
Parameters
----------
data_sizes : tuple, ints
"""
fig, ax = plt.subplots(figsize=(8, 8))
times = ([], [])
kms = (kmeans.base_python, kmeans.numpy)
for num_points in data_sizes:
X = make_blobs(num_points)
for data, km, km_time in zip((X.tolist(), X), kms, times):
start_time = time()
km(data, k=2)
km_time.append(time() - start_time)
ax.plot(data_sizes, times[0], c='r', label='Base')
ax.plot(data_sizes, times[1], c='b', label='NumPy')
ax.set_xlabel('Number of Points to Cluster', fontsize=16)
ax.set_ylabel('Time to Cluster - Seconds', fontsize=16)
ax.legend(loc='best')
fig.suptitle('Time Scaling for 1000 Iterations', fontsize=20)
plt.show()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run demo of numeric Python with k-means.')
parser.add_argument('--count', default=100, type=int,
help='number of data points to generate in total')
parser.add_argument('--base', action='store_true',
help='run with base python implementation')
parser.add_argument('--numpy', action='store_true',
help='run with numpy implementation')
parser.add_argument('--comp', action='store_true',
help='run to create time comparison plot')
parser.add_argument('--ex', action='store_true',
help='run to create example plot')
args = parser.parse_args()
if args.comp:
plot_timing_comp()
X = make_blobs(args.count)
if args.ex:
plot_single(X, kmeans.numpy, 'Example k-means', legend=True)
if args.base:
plot_single(X, kmeans.base_python, 'Base Python k-means')
if args.numpy:
plot_single(X, kmeans.numpy, 'NumPy k-means')
if not any([args.comp, args.ex, args.base, args.numpy]):
plot_cluster_comp(X)