-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcomputations.py
More file actions
231 lines (218 loc) · 11.5 KB
/
computations.py
File metadata and controls
231 lines (218 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
from factors import Shape, Coupling
# DIMENSIONS and COUPLING for GEMMS:
# M: Weight/Out rows
# K: Inner dimension, Weight cols/In rows
# N: In/Out cols
# ==> MAC: Out[m][n] += W[m][k] * In[k][n]
gemm_coupling = Coupling(['M', 'K', 'N'], ['K', 'N'], ['M', 'K'], ['M', 'N'])
# DIMENSIONS and COUPLING for CONVOLUTIONS:
# M: Filter num/Out depth
# P: Out height
# Q: Out width
# C: Filter/Input depth
# R: Filter height
# S: Filter width
# => P+R-1: Input height
# => Q+S-1: Input width
# ==> MAC: Out[m][p][q] += W[m][c][r][s] * In[c][p+r][q+s]
conv_coupling = Coupling(['M', 'P', 'Q', 'C', 'R', 'S'], ['C', ['P', 'R'], ['Q', 'S']], ['M', 'C', 'R', 'S'], ['M', 'P', 'Q'])
# WITH STRIDE the indexing becomes:
# => Pstride*P+Rdilation*R-1: Input height
# => Qstride*Q+Sdilation*S-1: Input width
# ==> MAC: Out[m][p][q] += W[m][c][r][s] * In[c][p*Pstride+r*Rdilation][q*Qstride+s*Sdilation]
conv_coupling_with_stride = Coupling(['M', 'P', 'Q', 'C', 'R', 'S'], ['C', ['P', 'R'], ['Q', 'S']], ['M', 'C', 'R', 'S'], ['M', 'P', 'Q'], in_strides = {'P': 'Pstride', 'R': 'Rdilation', 'Q': 'Qstride', 'S': 'Sdilation'})
# WITH BATCHES too we get:
# N: Batch size
# ==> MAC: Out[n][m][p][q] += W[m][c][r][s] * In[n][c][p*Pstride+r*Rdilation][q*Qstride+s*Sdilation]
conv_coupling_with_stride_and_batches = Coupling(['N', 'M', 'P', 'Q', 'C', 'R', 'S'], ['N', 'C', ['P', 'R'], ['Q', 'S']], ['M', 'C', 'R', 'S'], ['N', 'M', 'P', 'Q'], in_strides = {'P': 'Pstride', 'R': 'Rdilation', 'Q': 'Qstride', 'S': 'Sdilation'})
# In a TRANSPOSED CONVOLUTION DIMENSIONS become:
# P: Input height
# Q: Input width
# => P+R-1: Out height
# => Q+S-1: Out width
# ==> MAC: Out[m][p+r][q+s] += W[m][c][r][s] * In[c][p][q] (stride and dilation omitted for clarity)
transposed_conv_coupling = Coupling(['M', 'P', 'Q', 'C', 'R', 'S'], ['C', 'P', 'Q'], ['M', 'C', 'R', 'S'], ['M', ['P', 'R'], ['Q', 'S']], out_strides = {'P': 'Pstride', 'R': 'Rdilation', 'Q': 'Qstride', 'S': 'Sdilation'})
# WITH BATCHES too we get:
# N: Batch size
# ==> MAC: Out[n][m][p+r][q+s] += W[m][c][r][s] * In[n][c][p][q] (stride and dilation omitted for clarity)
transposed_conv_coupling_with_batches = Coupling(['N', 'M', 'P', 'Q', 'C', 'R', 'S'], ['N', 'C', 'P', 'Q'], ['M', 'C', 'R', 'S'], ['N', 'M', ['P', 'R'], ['Q', 'S']], out_strides = {'P': 'Pstride', 'R': 'Rdilation', 'Q': 'Qstride', 'S': 'Sdilation'})
# NOTE: each comp must be strictly compatible with its coupling, that is, it must assign a value to each of the coupling's dimensions.
# Then, the comp's coupling may happen to be a subcoupling of the one used to define the current architecture.
"""
Generates computation instances for each GEMM of a BERT Transformer
with arbitrary parameters/dimensions. See:
"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding"
"""
def comp_BERT(embedding : int, seq_length : int, heads : int, ff_dim : int) -> dict[str, Shape]:
assert embedding % heads == 0, f"Embedding dim ({embedding}) must be divisible by the number of heads ({heads})."
return {
'KQV': Shape(
M = embedding*3,
K = embedding,
N = seq_length
),
'KTQ': Shape(
M = seq_length,
K = embedding//heads,
N = seq_length
),
'VScores': Shape(
M = embedding//heads,
K = seq_length,
N = seq_length
),
'Out': Shape(
M = embedding,
K = embedding,
N = seq_length
),
'FF1': Shape(
M = ff_dim,
K = embedding,
N = seq_length
),
'FF2': Shape(
M = embedding,
K = ff_dim,
N = seq_length
)
}
comp_BERT_base = comp_BERT(768, 1024, 12, 3072)
comp_BERT_large = comp_BERT(1024, 4096, 16, 4096)
comp_harsh_factos_1 = Shape(
M = 4000,
K = 6032,
N = 12000
)
comp_harsh_factos_2 = Shape(
M = 7000,
K = 1440,
N = 4224
)
comp_requiring_padding = Shape(
M = 4037,
K = 6011,
N = 12071
)
"""
GEMMs coming from scientific applications, taken from previous literature:
"Evaluating Spatial Accelerator Architectures with Tiled Matrix-Matrix Multiplication"
"""
comp_maestro_blas = {
'MB1': Shape(
M = 8192,
K = 8192,
N = 8192
),
'MB2': Shape(
M = 1024,
K = 8192,
N = 1024
),
'MB3': Shape(
M = 8,
K = 8192,
N = 8
),
'MB4': Shape(
M = 8,
K = 1024,
N = 8192
),
'MB5': Shape(
M = 8192,
K = 1024,
N = 8
),
'MB6': Shape(
M = 512,
K = 256,
N = 256
)
}
"""
Convolutions from the layers of VGG16. See:
"Very Deep Convolutional Networks for Large-Scale Image Recognition"
"""
comp_vgg_16 = {
'L0': Shape(C = 3, M = 64, P = 224, Q = 224, R = 3, S = 3),
'L1': Shape(C = 64, M = 64, P = 224, Q = 224, R = 3, S = 3),
'L2': Shape(C = 64, M = 128, P = 112, Q = 112, R = 3, S = 3),
'L3': Shape(C = 128, M = 128, P = 112, Q = 112, R = 3, S = 3),
'L4': Shape(C = 128, M = 256, P = 56, Q = 56, R = 3, S = 3),
'L5': Shape(C = 256, M = 256, P = 56, Q = 56, R = 3, S = 3),
#'L6': Shape(C = 256, M = 256, P = 56, Q = 56, R = 3, S = 3),
'L7': Shape(C = 256, M = 512, P = 28, Q = 28, R = 3, S = 3),
'L8': Shape(C = 512, M = 512, P = 28, Q = 28, R = 3, S = 3),
#'L9': Shape(C = 512, M = 512, P = 28, Q = 28, R = 3, S = 3),
'L10': Shape(C = 512, M = 512, P = 14, Q = 14, R = 3, S = 3),
#'L11': Shape(C = 512, M = 512, P = 14, Q = 14, R = 3, S = 3),
#'L12': Shape(C = 512, M = 512, P = 14, Q = 14, R = 3, S = 3),
'L13': Shape(C = 25088, M = 4096, P = 1, Q = 1, R = 1, S = 1), # fully connected
'L14': Shape(C = 4096, M = 4096, P = 1, Q = 1, R = 1, S = 1), # fully connected
'L15': Shape(C = 4096, M = 1000, P = 1, Q = 1, R = 1, S = 1), # fully connected
'L3+': Shape(C = 128, M = 128, P = 112, Q = 112, R = 9, S = 9) # large filter experiment
}
"""
Convolutions from the layers of ResNet18.
"""
comp_resnet_18 = {
'L0': Shape(C = 3, M = 64, P = 112, Q = 112, R = 7, S = 7, Pstride = 2, Qstride = 2, Rdilation = 1, Sdilation = 1),
'L1': Shape(C = 64, M = 64, P = 56, Q = 56, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
#'L2': Shape(C = 64, M = 64, P = 56, Q = 56, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
#'L3': Shape(C = 64, M = 64, P = 56, Q = 56, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
#'L4': Shape(C = 64, M = 64, P = 56, Q = 56, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'L5': Shape(C = 64, M = 128, P = 28, Q = 28, R = 3, S = 3, Pstride = 2, Qstride = 2, Rdilation = 1, Sdilation = 1),
'L6': Shape(C = 128, M = 128, P = 28, Q = 28, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'L7': Shape(C = 64, M = 128, P = 28, Q = 28, R = 1, S = 1, Pstride = 2, Qstride = 2, Rdilation = 1, Sdilation = 1), # point-wise
#'L8': Shape(C = 128, M = 128, P = 28, Q = 28, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
#'L9': Shape(C = 128, M = 128, P = 28, Q = 28, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'L10': Shape(C = 128, M = 128, P = 14, Q = 14, R = 3, S = 3, Pstride = 2, Qstride = 2, Rdilation = 1, Sdilation = 1),
'L11': Shape(C = 256, M = 256, P = 14, Q = 14, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'L12': Shape(C = 128, M = 256, P = 14, Q = 14, R = 1, S = 1, Pstride = 2, Qstride = 2, Rdilation = 1, Sdilation = 1), # point-wise
#'L13': Shape(C = 256, M = 256, P = 14, Q = 14, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
#'L14': Shape(C = 256, M = 256, P = 14, Q = 14, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'L15': Shape(C = 256, M = 512, P = 7, Q = 7, R = 3, S = 3, Pstride = 2, Qstride = 2, Rdilation = 1, Sdilation = 1),
'L16': Shape(C = 512, M = 512, P = 7, Q = 7, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'L17': Shape(C = 256, M = 512, P = 7, Q = 7, R = 1, S = 1, Pstride = 2, Qstride = 2, Rdilation = 1, Sdilation = 1), # point-wise
#'L18': Shape(C = 512, M = 512, P = 7, Q = 7, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
#'L19': Shape(C = 512, M = 512, P = 7, Q = 7, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'L20': Shape(C = 512, M = 1000, P = 1, Q = 1, R = 1, S = 1, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1), # fully connected
'L1+': Shape(C = 256, M = 256, P = 56, Q = 56, R = 3, S = 3, Pstride = 2, Qstride = 2, Rdilation = 3, Sdilation = 3), # 2D dilation experiment
'L3+': Shape(C = 128, M = 128, P = 112, Q = 112, R = 9, S = 9, Pstride = 1, Qstride = 4, Rdilation = 1, Sdilation = 3) # 1D dilation experiment
}
"""
Convolutions chosen as benchmark for the tool.
"""
benchmark_convs = {
# VGG16
'I': Shape(C = 128, M = 256, P = 56, Q = 56, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'II': Shape(C = 512, M = 512, P = 28, Q = 28, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
# ResNet18 and 50
'III': Shape(C = 3, M = 64, P = 112, Q = 112, R = 7, S = 7, Pstride = 2, Qstride = 2, Rdilation = 1, Sdilation = 1),
'IV': Shape(C = 64, M = 64, P = 56, Q = 56, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'V': Shape(C = 128, M = 128, P = 28, Q = 28, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'VI': Shape(C = 256, M = 256, P = 14, Q = 14, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'VII': Shape(C = 256, M = 512, P = 7, Q = 7, R = 3, S = 3, Pstride = 2, Qstride = 2, Rdilation = 1, Sdilation = 1),
'VIII': Shape(C = 64, M = 256, P = 56, Q = 56, R = 1, S = 1, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1), # point-wise
# MobileNetV3
'IX': Shape(C = 3, M = 96, P = 176, Q = 176, R = 3, S = 3, Pstride = 2, Qstride = 2, Rdilation = 1, Sdilation = 1),
'X': Shape(C = 72, M = 72, P = 28, Q = 28, R = 3, S = 3, Pstride = 2, Qstride = 2, Rdilation = 1, Sdilation = 1),
'XI': Shape(C = 576, M = 576, P = 7, Q = 7, R = 5, S = 5, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'XII': Shape(C = 24, M = 88, P = 28, Q = 28, R = 1, S = 1, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1), # point-wise
# Strides and Dilation
'XIII': Shape(C = 16, M = 16, P = 224, Q = 224, R = 3, S = 3, Pstride = 3, Qstride = 3, Rdilation = 4, Sdilation = 4),
'XIV': Shape(C = 128, M = 128, P = 112, Q = 112, R = 9, S = 9, Pstride = 4, Qstride = 4, Rdilation = 3, Sdilation = 3),
'XV': Shape(C = 256, M = 256, P = 56, Q = 56, R = 3, S = 3, Pstride = 2, Qstride = 2, Rdilation = 3, Sdilation = 3)
}
benchmark_convs_transposed = {
# Transposed convs
'XVI': Shape(C = 128, M = 256, P = 32, Q = 32, R = 4, S = 4, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'XVII': Shape(C = 576, M = 576, P = 7, Q = 7, R = 5, S = 5, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1)
}
benchmark_convs_batched = {
# Batched convs
'XVIII': Shape(N = 64, C = 256, M = 256, P = 14, Q = 14, R = 3, S = 3, Pstride = 1, Qstride = 1, Rdilation = 1, Sdilation = 1),
'XIX': Shape(N = 128, C = 72, M = 72, P = 28, Q = 28, R = 3, S = 3, Pstride = 2, Qstride = 2, Rdilation = 1, Sdilation = 1),
'XX': Shape(N = 32, C = 256, M = 256, P = 56, Q = 56, R = 5, S = 5, Pstride = 2, Qstride = 2, Rdilation = 3, Sdilation = 3)
}