forked from cerati/p2z-tests
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscript.py
More file actions
212 lines (182 loc) · 8.28 KB
/
script.py
File metadata and controls
212 lines (182 loc) · 8.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
#!/bin/env python
"""TAU trial data for TAU Profile.x.y.z format profiles
Parses a set of TAU profile files and yields multi-indexed Pandas dataframes for the
interval and atomic events.
"""
from __future__ import print_function
import csv
import glob
import mmap
import os
import re
import xml.etree.ElementTree as ElementTree
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sys import stderr
import pandas
import sys
NTHREADS = 40
class TauProfileParser(object):
"""Parser for TAU's profile.* format."""
_interval_header_re = re.compile(b'(\\d+) templated_functions_MULTI_(.+)')
_atomic_header_re = re.compile(b'(\\d+) userevents')
def __init__(self, trial, metric, metadata, indices, interval_data, atomic_events):
self.trial = trial
self.metric = metric
self.metadata = metadata
self.indices = indices
self._interval_data = interval_data
self._atomic_data = atomic_events
def interval_data(self):
return self._interval_data
def atomic_data(self):
return self._atomic_data
def get_value_types(self):
return [key for key in dict(self._interval_data.dtypes)
if dict(self._interval_data.dtypes)[key] in ['float64', 'int64']]
def summarize_samples(self, across_threads=False, callpaths=True):
groups = 'Timer Name' if across_threads else ['Node', 'Context', 'Thread', 'Timer Name']
if callpaths:
base_data = self._interval_data.loc[self._interval_data['Group'].str.contains("TAU_SAMPLE")]
else:
base_data = self._interval_data.loc[self._interval_data['Timer Type'] == 'SAMPLE']
summary = base_data.groupby(groups).sum()
summary.index = summary.index.map(
lambda x: '[SUMMARY] ' + x if across_threads else (x[0], x[1], x[2], '[SUMMARY] ' + x[3]))
return summary
def summarize_allocations(self):
sums = self.atomic_data().groupby('Timer').agg({'Count': 'sum', 'Mean': 'mean'})
allocs = sums[sums.index.to_series().str.contains('alloc')][['Count', 'Mean']]
allocs['Total'] = allocs['Count'] * allocs['Mean']
return allocs
@classmethod
def _parse_header(cls, fin):
match = cls._interval_header_re.match(fin.readline())
interval_count, metric = match.groups()
return int(interval_count), metric
@classmethod
def _parse_metadata(cls, fin):
fields, xml_wanabe = fin.readline().split(b'<metadata>')
xml_wanabe = b'<metadata>' + xml_wanabe
if (fields != b"# Name Calls Subrs Excl Incl ProfileCalls" and
fields != b'# Name Calls Subrs Excl Incl ProfileCalls # '):
raise RuntimeError('Invalid profile file: %s' % fin.name)
try:
metadata_tree = ElementTree.fromstring(xml_wanabe)
except ElementTree.ParseError as err:
raise RuntimeError('Invalid profile file: %s' % err)
metadata = {}
for attribute in metadata_tree.iter('attribute'):
name = attribute.find('name').text
value = attribute.find('value').text
metadata[name] = value
return metadata
@classmethod
def _parse_interval_data(cls, fin, count):
pass
@classmethod
def _parse_atomic_header(cls, fin):
aggregates = fin.readline().split(b' aggregates')[0]
if aggregates != b'0':
print("aggregates != 0, is '%s'" % aggregates, file=stderr)
match = cls._atomic_header_re.match(fin.readline())
try:
count = int(match.group(1))
if fin.readline() != b"# eventname numevents max min mean sumsqr\n":
raise RuntimeError('Invalid profile file: %s' % fin.name)
except AttributeError:
count = 0
return count
@staticmethod
def extract_from_timer_name(name):
import re
tag_search = re.search('^\[(\w+)\]\s+(.*)', name)
timer_type, rest = tag_search.groups() if tag_search else (None, name)
name_search = re.search('(.+)\[({.*)\]', rest)
func_name, location = name_search.groups() if name_search else (rest, None)
return func_name, location, timer_type
@classmethod
def parse(cls, dir_path, filenames=None, trial=None):
if not os.path.isdir(dir_path):
print("Error: %s is not a directory." % dir_path, file=stderr)
sys.exit(1)
intervals = []
atomics = []
indices = []
trial_data_metric = None
trial_data_metadata = None
if filenames is None:
filenames = [os.path.basename(x) for x in glob.glob(os.path.join(dir_path, 'profile.*'))]
if not filenames:
print("Error: No profile files found.")
sys.exit(1)
for filename in sorted(filenames,
key=lambda s: [int(t) if t.isdigit() else t.lower() for t in re.split('(\d+)', s)]):
location = os.path.basename(filename).replace('profile.', '')
node, context, thread = (int(x) for x in location.split('.'))
file_path = os.path.join(dir_path, filename)
# print(file_path)
with open(file_path) as fin:
mm = mmap.mmap(fin.fileno(), 0, access=mmap.ACCESS_READ)
# mm = mmap.mmap(fin.fileno(), 0, mmap.MAP_PRIVATE, mmap.PROT_READ)
interval_count, metric = cls._parse_header(mm)
if not trial_data_metric:
trial_data_metric = metric
metadata = cls._parse_metadata(mm)
if not trial_data_metadata:
trial_data_metadata = metadata
interval = pandas.read_table(mm, nrows=interval_count, delim_whitespace=True,
names=['Calls', 'Subcalls', 'Exclusive',
'Inclusive', 'ProfileCalls', 'Group'],
engine='c')
split_index = interval.reset_index()['index'].apply(cls.extract_from_timer_name)
for n, col in enumerate(['Timer Name', 'Timer Location', 'Timer Type']):
interval[col] = split_index.apply(lambda l: l[n]).values
mm.seek(0)
for i in range(0, interval_count + 2):
mm.readline()
cls._parse_atomic_header(mm)
atomic = pandas.read_table(mm, names=['Count', 'Maximum', 'Minimum', 'Mean', 'SumSq'],
delim_whitespace=True, engine='c')
mm.close()
intervals.append(interval)
atomics.append(atomic)
indices.append((node, context, thread))
# print(atomic)
interval_df = pandas.concat(intervals, keys=indices)
interval_df.index.rename(['Node', 'Context', 'Thread', 'Timer'], inplace=True)
atomic_df = pandas.concat(atomics, keys=indices)
atomic_df.index.rename(['Node', 'Context', 'Thread', 'Timer'], inplace=True)
# print(interval_df['Calls'])
return cls(trial, trial_data_metric, trial_data_metadata, indices, interval_df, atomic_df)
if __name__ == "__main__":
if len(sys.argv) == 1:
path = '.'
elif len(sys.argv) == 3:
path = sys.argv[1]
functionName = sys.argv[2]
else:
print("Usage: %s [path]" % sys.argv[0])
print('Length', len(sys.argv))
print('Argv[1]', sys.argv[1])
print('Argv[1]', sys.argv[2])
sys.exit(1)
print(functionName)
data = TauProfileParser.parse(path)
dataframe = data.interval_data()
dataframe = dataframe['Exclusive']
dataframe.to_csv('data.csv')
# print(data.to_csv(quoting=csv.QUOTE_NONNUMERIC))
data = pd.read_csv("data.csv")
function_df = data.loc[data['Timer'].str.contains('.TAU application ')]
# print(function_df)
function_df = function_df.loc[function_df['Timer'].str.contains(functionName)]
if(functionName == "KalmanGain"):
function_df = function_df.drop(function_df[function_df['Timer'].str.contains('KalmanGainInv')].index)
path = path.split("/")
path = path[2]
if '_TBB' in path:
path = path.replace('_TBB', '')
print(len(function_df['Exclusive']))
function_df['Exclusive'].to_csv('Data/Exclusive_Count_Data_' + path + '_' + functionName + '.csv', mode='a', index=False)