-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgitlapse.py
More file actions
375 lines (267 loc) · 13.3 KB
/
gitlapse.py
File metadata and controls
375 lines (267 loc) · 13.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
import os
import inspect
from subprocess import *
import tempfile
import sys
from optparse import OptionParser
from xml.dom.minidom import parseString
import re
from decimal import *
class Executor:
def execute(self, command):
try:
print "Running " + command
p = Popen(command, shell=True, stdout=PIPE)
retcode = os.waitpid(p.pid, 0)[1]
if retcode < 0:
print >>sys.stderr, "Child was terminated by signal", -retcode
sys.exit(retcode)
else:
return p.stdout
except OSError, e:
print >>sys.stderr, "Execution failed:", e
sys.exit(2)
class GitRepo:
def __init__(self, git_dir, working_dir, executor):
self.git_dir = git_dir
self.working_dir = working_dir
self.executor = executor
def current_head(self):
return self.executor.execute('git --git-dir=' + self.git_dir + ' log --format=format:"%H" -1').read()
def list_commits_to_file(self, destination_file_name):
self.executor.execute('git --git-dir=' + self.git_dir + ' --no-pager log --format=format:"%H || %ai || %s%n" --date=iso > ' + destination_file_name)
return open(destination_file_name)
def commits(self, destination_file_name):
git_output_file = self.list_commits_to_file(destination_file_name)
list_of_commits = []
for line in git_output_file:
records = line.split('||')
if len(records) > 1:
git_commit = records[0]
date = records[1]
list_of_commits.append((git_commit, date))
return list_of_commits
def hard_reset(self, commit_hash):
self.executor.execute('git --git-dir=' + self.git_dir + ' --work-tree=' + self.working_dir + ' reset --hard %s' % commit_hash)
class CheckstyleParser:
def parse(self, checkstyle_report_content):
dom = parseString(checkstyle_report_content)
root = dom.getElementsByTagName('checkstyle')[0]
classes = root.getElementsByTagName('file')
healthy_class_names = [clazz.getAttribute('name') for clazz in classes if len(clazz.getElementsByTagName('error')) == 0]
unhealthy_classes = []
for clazz in classes:
if len(clazz.getElementsByTagName('error')) > 0:
errors = {}
for error in clazz.getElementsByTagName('error'):
errors[error.getAttribute('source')] = error.getAttribute('message')
unhealthy_classes.append(ToxicClass(clazz.getAttribute('name'), errors))
return ToxicityReport(healthy_class_names, unhealthy_classes)
class CheckstyleExecution:
def __init__(self, executor, path_to_install):
self.executor = executor
self.path_to_install = path_to_install
def analyse(self, src_directory):
#java -jar ../../code-time-lapse/tools/checkstyle/checkstyle-all-4.4.jar -c ../../code-time-lapse/tools/checkstyle/metrics.xml -r src -f xml
stdout = self.executor.execute('java -jar %s/tools/checkstyle/checkstyle-all-4.4.jar -c %s/tools/checkstyle/metrics.xml -r %s -f xml' % (self.path_to_install, self.path_to_install, src_directory))
return stdout.read()
class ToxicClass:
def __init__(self, class_name, errors):
self.errors = errors
class ToxicityReport:
def __init__(self, healthy_class_names, unhealthy_class_names):
self.healthy_class_names = healthy_class_names
self.unhealthy_class_names = unhealthy_class_names
def number_of_healty_classes(self):
return len(self.healthy_class_names)
def number_of_unhealthy_classes(self):
return len(self.unhealthy_class_names)
class ToxicityCalculator():
def __init__(self):
self.handlers = {
'com.puppycrawl.tools.checkstyle.checks.sizes.MethodLengthCheck' : self.calculate_long_method_length_cost,
'com.puppycrawl.tools.checkstyle.checks.sizes.FileLengthCheck' : self.calculate_long_class_cost,
'com.puppycrawl.tools.checkstyle.checks.metrics.ClassDataAbstractionCouplingCheck' : self.calculate_abstraction_coupling_cost}
def calculate_abstraction_coupling_cost(self, message_string):
values = self.matches('Class Data Abstraction Coupling is (\d*) \(max allowed is (\d*)\)', message_string)
return self.cost(values[0], values[1])
def calculate_long_method_length_cost(self, message_string):
values = self.matches('Method length is (\d*) lines \(max allowed is (\d*)\).', message_string)
return self.cost(values[0], values[1])
def calculate_long_class_cost(self, message_string):
values = self.matches('File length is (\d*) lines \(max allowed is (\d*)\)', message_string)
return self.cost(values[0], values[1])
def matches(self, pattern, string):
return re.search(pattern, string).groups()
def toxicity(self, errors):
score = Decimal(0)
for error_type in errors.keys():
score = score + self.handlers[error_type](errors[error_type])
return self.round_down(score)
def cost(self, actual, allowed):
return Decimal(actual) / Decimal(allowed)
def round_down(self, decimal):
return decimal.quantize(Decimal('.01'), rounding=ROUND_DOWN)
class SkippingAnalyser:
def __init__(self, skipping_commits, delegate_analyser, git_repo):
self.skipping_commits = skipping_commits
self.delegate_analyser = delegate_analyser
self.git_repo = git_repo
self.current_count = 0
def analyse(self, commit_hash, commit_date):
self.current_count = self.current_count + 1
if self.current_count == self.skipping_commits:
self.git_repo.hard_reset(commit_hash)
self.delegate_analyser.analyse(commit_hash, commit_date)
self.current_count = 0
class ClocParser:
def create_record(self, src_dir, by_date_count, cloc_line):
records = cloc_line.split(',')
if len(records) < 7:
raise Exception('Cannot parse line "' + cloc_line + '"')
number_of_files = records[0]
language = records[1]
number_of_blank_lines = records[2]
lines_of_comments = records[3]
lines_of_code = records[4]
scale = records[5]
third_gen = records[6]
by_date_count.add_record(src_dir, language, lines_of_code)
return by_date_count
def parse(self, commit_date, commit_hash, src_directory_name, cloc_output):
by_date_count = MetricsForCommit(commit_date, commit_hash)
lines = cloc_output.split('\n')
for line in lines:
if 'files' in line:
continue
if line.isspace() or len(line) == 0:
continue
by_date_count = self.create_record(src_directory_name, by_date_count, line)
return by_date_count
class TsvFormattingStore:
def __init__(self):
self.records_by_commit = {}
def store(self, metrics_for_commit):
commit = metrics_for_commit.commit
if self.records_by_commit.has_key(commit):
old_record = self.records_by_commit[commit]
old_record.merge(metrics_for_commit)
else:
self.records_by_commit[commit] = metrics_for_commit
def metrics_to_report(self):
metrics_to_report = {}
for record in self.records_by_commit.values():
for src_dir in record.src_dirs.keys():
metrics_for_dir = metrics_to_report.get(src_dir, set())
for metric in record.src_dirs[src_dir].keys():
metrics_for_dir.add(metric)
metrics_to_report[src_dir] = metrics_for_dir
return metrics_to_report
def create_row_header(self, metrics_to_report):
row_header = 'Date'
for src_dir in metrics_to_report.keys():
for language in metrics_to_report[src_dir]:
row_header = row_header + '\t' + src_dir + '-' + language
row_header = row_header + '\n'
return row_header
def as_csv(self):
metrics_to_report = self.metrics_to_report()
row_header = self.create_row_header(metrics_to_report)
for record in self.records_by_commit.values():
row_header = row_header + record.date
for src_dir in metrics_to_report.keys():
for metric in metrics_to_report[src_dir]:
row_header = row_header + '\t' + str(record.src_dirs.get(src_dir, {}).get(metric, 0))
row_header = row_header + '\n'
return row_header
class LinesOfCodeAnalyser:
def __init__(self, abs_src_directory, running_from, data_store, parser = ClocParser(), executor = Executor()):
self.executor = executor
self.parser = parser
self.running_from = running_from
self.abs_src_directory = abs_src_directory
self.data_store = data_store
def analyse(self, commit_hash, commit_date):
cloc_cmd = 'perl %s/tools/cloc-1.08.pl %s --csv --exclude-lang=CSS,HTML,XML --quiet' % (self.running_from, self.abs_src_directory)
cloc_result = self.executor.execute(cloc_cmd)
data_to_store = self.parser.parse(commit_date, commit_hash, self.abs_src_directory, cloc_result.read())
self.data_store.store(data_to_store)
class CompositeAnalyser:
def __init__(self, delegates):
self.delegates = delegates
def analyse(self, commit_hash, commit_date):
for delegate in self.delegates:
delegate.analyse(commit_hash, commit_date)
class MetricsForCommit:
def __init__(self, date, commit):
self.date = date
self.commit = commit
self.src_dirs = {}
def add_record(self, src_dir, metric, count):
counts_for_dir = self.src_dirs.get(src_dir, {})
counts_for_dir[metric] = int(count)
self.src_dirs[src_dir] = counts_for_dir
def merge(self, other_by_date_count):
if other_by_date_count.commit != self.commit:
raise Exception('Can only merge records with same commit')
for src_dir in other_by_date_count.src_dirs.keys():
self.src_dirs[src_dir] = other_by_date_count.src_dirs[src_dir]
def generate_commit_list(location_for_files, git_repo):
file_with_all_commits = location_for_files + "/commits.out"
return git_repo.commits(file_with_all_commits)
def line_counts(location_for_results, sample_rate, src_dirs, git_dir, working_dir):
data = open(location_for_results + "/line_count_by_time.tsv", 'w')
git_repo = GitRepo(git_dir, working_dir, Executor())
commit_list = generate_commit_list(location_for_results, git_repo)
head = git_repo.current_head()
store = TsvFormattingStore()
delegate = CompositeAnalyser([LinesOfCodeAnalyser(src_dir, RUNNING_FROM, store) for src_dir in src_dirs])
skipping_analyser = SkippingAnalyser(skipping_commits = sample_rate, delegate_analyser = delegate, git_repo = git_repo)
for commit in commit_list:
date = commit[1]
git_commit = commit[0]
skipping_analyser.analyse(git_commit, date)
data.write(store.as_csv())
print "Resetting to " + head
git_repo.hard_reset(head)
print data.name
data.close()
def to_gnuplot(data_table):
header_row = data_table.split('\n')[0]
columns = header_row.split('\t')[1:]
gnuplot = 'plot '
count = 4 # The first 3 columns contain the date
for column in columns:
gnuplot = gnuplot + ('"line_count_by_time.tsv" using 1:%d title "%s", ' % (count, column))
count = count + 1
return gnuplot
def execution_path(filename):
execution_path = os.path.join(os.path.dirname(inspect.getfile(sys._getframe(1))), 'run.sh')
path_to_run = os.path.abspath(execution_path)
if path_to_run.endswith('run.sh'):
index_of_run = len(path_to_run) - 6
path_to_run = path_to_run[:index_of_run]
print "Using " + path_to_run
return path_to_run
RUNNING_FROM = execution_path('run.sh')
def pwd():
return Executor().execute('pwd').read().strip()
def main(argv=None):
if argv is None:
argv = sys.argv
parser = OptionParser()
parser.add_option("-r", "--results_dir", action="store", dest="result_dir", type="string", default=".", help="Location where results will be stored")
parser.add_option("-s", "--source_dir", action="store", dest="src_dirs", type="string", default="src", help="A comma seperated list of directories to parse")
parser.add_option("-f", "--frequency_of_sample", action="store", dest="sample_rate", default=100, type="int", help="How often should a sample be made")
parser.add_option("-g", "--git_repo_dir", action="store", dest="git_repo_dir", default=pwd()+'/.git', type="string", help="The directory containing the .git file")
parser.add_option("-w", "--working_dir", action="store", dest="working_dir", default=pwd(), type="string", help="Where will files be checked out to for line counts etc")
(options, args) = parser.parse_args(argv)
results_dir = options.result_dir
sample_rate = options.sample_rate
src_dirs_str = options.src_dirs
git_dir = options.git_repo_dir
working_dir = options.working_dir
print "Using a sample rate of " + str(sample_rate) + " reading from files " + str(src_dirs_str)
line_counts(results_dir, sample_rate, src_dirs_str.split(','), git_dir, working_dir)
if __name__ == "__main__":
main()