-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_graphs.py
More file actions
executable file
·258 lines (187 loc) · 7.28 KB
/
plot_graphs.py
File metadata and controls
executable file
·258 lines (187 loc) · 7.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as mdates
import matplotlib.axes as axes
import matplotlib.ticker as ticker
import matplotlib
import seaborn as sns
from datetime import datetime
DEADLINE='2019-07-31-235959'
PLOT_DEADLINE=False
WORDS_COLOR='purple'
PAGES_COLOR='blue'
MPL_BACKEND='agg'
COLOR_SET= ['red', 'goldenrod', 'gold', '#FFFF79', 'aquamarine', 'green']
def get_times_as_date_time(times):
# Convert times to DateTime
timesdf = []
for time in times:
timesdf.append(datetime.strptime(time, '%Y-%m-%d-%H%M%S'))
return timesdf
def plot_simple_line(input_file, output_file):
# Import the data
df = pd.read_csv(input_file)
# Rename columns and get as Series
df.columns = ['Time', 'Pages', 'Words']
times = df['Time']
pages = df['Pages']
words = df['Words']
# Convert times to DateTime
timesdf = get_times_as_date_time(times)
# Get references to the figure and axes
fig = plt.figure()
ax = plt.gca()
ax2 = ax.twinx()
# Make a large graph
fig.set_size_inches(20, 10)
# Plot the times and counts
ax.plot(timesdf, pages, color=PAGES_COLOR)
ax.set_ylabel('pages', color=PAGES_COLOR)
ax.tick_params('y', colors=PAGES_COLOR)
ax.set_xlabel('date')
# Y axis between 0 and 300 (limit for PhD thesis)
ax.set_ylim([0,350])
# Plot ave length
ax.axhline(y=140, linestyle=':', color='green')
ax.axhline(y=300, linestyle='--', color='red')
ax2.plot(timesdf, words, color=WORDS_COLOR)
ax2.set_ylabel('words', color=WORDS_COLOR)
ax2.tick_params('y', colors=WORDS_COLOR)
# Second Y axis between 0 and 80,000 (limit for PhD thesis)
ax2.set_ylim([0,80000])
# Rotate x labels
fig.autofmt_xdate()
# Format the DateTime labels
myFmt = mdates.DateFormatter('%Y-%m-%d')
ax.xaxis.set_major_formatter(myFmt)
ax.xaxis_date()
if PLOT_DEADLINE:
ax.axvline(x=datetime.strptime(DEADLINE, '%Y-%m-%d-%H%M%S'), linestyle='--', color='red')
# Save the plot out
plt.savefig(output_file)
# Comment out for command line
# plt.show()
# Close the plot
plt.close()
def plot_stacked_regions(input_file, output_image):
# Import the data
df = pd.read_csv(input_file)
# Get series
times = df['TIME']
categories = [ df['TODO'], df['STARTED'], df['FIRST_DRAFT'], df['SECOND_DRAFT'], df['REVISIONS_DONE'], df['COMPLETE'] ]
timesdf = get_times_as_date_time(times)
# Normalize each observation to total to 100%
categories = normalize_categories(categories)
# Make a large graph
fig = plt.figure()
fig.set_size_inches(20, 10)
# Plot graph
plt.stackplot(timesdf, categories, labels=['TODO', 'STARTED', 'FIRST_DRAFT', 'SECOND_DRAFT', 'REVISIONS_DONE', 'COMPLETE'],
colors=COLOR_SET)
# Rotate x labels
fig.autofmt_xdate()
# Format the DateTime labels
ax = plt.gca()
myFmt = mdates.DateFormatter('%Y-%m-%d')
ax.xaxis.set_major_formatter(myFmt)
ax.xaxis_date()
ax.set_xlabel('date')
# Set Y axis as percentages
ax.set_ylabel('% of categories')
ax.yaxis.set_major_formatter(ticker.PercentFormatter())
ax.legend(loc=3)
if PLOT_DEADLINE:
ax.axvline(x=datetime.strptime(DEADLINE, '%Y-%m-%d-%H%M%S'), linestyle='--', color='red')
# Save the plot out
plt.savefig(output_image)
# Show the plot (comment out for command line)
# plt.show()
plt.close()
def plot_combined(states_file, counts_file, output_image):
# Import the data
df_count = pd.read_csv(counts_file)
df_states = pd.read_csv(states_file)
# Rename columns and get as Series
df_count.columns = ['Time', 'Pages', 'Words']
times_count = df_count['Time']
pages = df_count['Pages']
times_states = df_states['TIME']
categories = [ df_states['TODO'], df_states['STARTED'], df_states['FIRST_DRAFT'],
df_states['SECOND_DRAFT'], df_states['REVISIONS_DONE'], df_states['COMPLETE'] ]
# Convert times to DateTime
timesdf_count = get_times_as_date_time(times_count)
timesdf_states = get_times_as_date_time(times_states)
# Normalize each observation to total to 100%
categories = normalize_categories(categories)
# Get references to the figure and axes
fig = plt.figure()
ax = plt.gca()
ax2 = ax.twinx()
# Make a large graph
fig.set_size_inches(20, 10)
# Plot graph
ax.stackplot(timesdf_states, categories, labels=['TODO', 'STARTED', 'FIRST_DRAFT', 'SECOND_DRAFT', 'REVISIONS_DONE', 'COMPLETE'],
colors=COLOR_SET)
ax.set_ylabel('% of categories')
ax.yaxis.set_major_formatter(ticker.PercentFormatter())
# Plot the times and counts
ax2.plot(timesdf_count, pages, color=PAGES_COLOR)
ax2.set_ylabel('pages', color=PAGES_COLOR)
ax2.tick_params('y', colors=PAGES_COLOR)
ax2.set_xlabel('date')
# Y axis between 0 and 300 (limit for PhD thesis)
ax2.set_ylim([0,250])
ax.legend(loc=1, facecolor='white')
if PLOT_DEADLINE:
ax.axvline(x=datetime.strptime(DEADLINE, '%Y-%m-%d-%H%M%S'), linestyle='--', color='red')
# Rotate x labels
fig.autofmt_xdate()
# Format the DateTime labels
myFmt = mdates.DateFormatter('%Y-%m-%d')
ax.xaxis.set_major_formatter(myFmt)
ax.xaxis_date()
plt.tight_layout()
# Save the plot out
plt.savefig(output_image)
plt.close()
def normalize_categories(categories):
# Count categories
number_of_observations = len(categories[0])
# for each observation
# Convert categories to floats
for index in range(0, len(categories)):
categories[index] = categories[index].apply(float)
totals_by_index = {}
for index in range(0, number_of_observations):
total = 0
# for each category add the relevant observation to total
for category in categories:
total += category[index]
totals_by_index[index] = total
for cat_index, category in enumerate(categories):
values = []
for index, value in category.items():
values.append(value * 100.0 / totals_by_index[index])
categories[cat_index] = values
return categories
def plot_simple_line_with_default_args():
# Set seaborn style
sns.set()
matplotlib.use(MPL_BACKEND, warn=True)
print('Matplotlib Backend ' + matplotlib.get_backend())
plot_simple_line('page_count.csv','count.png')
def plot_combined_with_default_args():
matplotlib.use(MPL_BACKEND, warn=True)
print('Matplotlib Backend ' + matplotlib.get_backend())
plot_combined('state.csv', 'page_count.csv', 'combined.png')
def plot_stacked_regions_with_default_args():
# Set seaborn style
sns.set()
matplotlib.use(MPL_BACKEND, warn=True)
print('Matplotlib Backend ' + matplotlib.get_backend())
plot_stacked_regions('state.csv','state.png')
if __name__ == '__main__':
# once both are done, render both to a single plot too
plot_combined_with_default_args()
plot_simple_line_with_default_args()
plot_stacked_regions_with_default_args()