-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsettings.py
More file actions
178 lines (167 loc) · 12.9 KB
/
settings.py
File metadata and controls
178 lines (167 loc) · 12.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
class MetaSettings(type):
# Prevent instantiation of inheriting classes.
def __call__(cls, *args, **kwargs):
raise TypeError(f"Instances of {cls.__name__} cannot be created. Use the class directly.")
# Print the forceful update INFO message every time a setting is assigned!
# If the VERBOSE attribute is present, use it to toggle the print.
def __setattr__(cls, name, value):
if getattr(cls, name, None) != value and (not hasattr(cls, "VERBOSE") or cls.VERBOSE) and name != "VERBOSE":
print(f"INFO: forcefully updating setting {name} to {value}")
super().__setattr__(name, value)
class Settings(metaclass = MetaSettings):
# If True, enables logging of the MSE process. Note that such prints occur during the timed
# section of the program, set to False for accurate timing results.
VERBOSE = True
# MAPPER SETTINGS:
# If False, FF only searches for better solutions at a one-factor distance from the current one,
# if True, FF searches for solutions at a distance of multiple factors, all be it only arity is
# varied, with the tried factor being just one. (tries different multiplicities)
ITERATE_AMOUNTS = False
# Same as ITERATE_AMOUNTS, but enables the feature only when optimizing exclusively spatial levels.
SPATIAL_ITERATE_AMOUNTS = ITERATE_AMOUNTS or False
# If True, factors allocated on spatial levels will not be optimized (as if they were constraints),
# and this is done after factor allocation on spatial fanouts is maximized.
# NOTE: automatically set to False in case of 2 dimensions on the same fanout.
FREEZE_SPATIALS = True
# Number of one-factor steps to try during local search after of which the best choice is picked.
# NOTE: automatically raised to (at least) 2 in case of 2 dimensions on the same fanout.
STEPS_TO_EXPLORE = 1
# Number of one-factor steps to try during local search after of which the best choice is picked.
# NOTE: for the mappers using this, it applies during the spatial fanouts local search step.
# NOTE: automatically raised to (at least) the maximum number of distinct prime factors on a fanout.
SPATIAL_STEPS_TO_EXPLORE = 1
# Number of one-factor steps to try during local search after of which the best choice is picked.
# NOTE: for the mappers using this, it applies during the co-optimization local search step.
# NOTE: automatically raised to (at least) 2 in case of 2 dimensions on the same fanout.
CO_OPT_STEPS_TO_EXPLORE = 1
# Initial number of one-factor steps to try during local search, where it progressively increses
# up to STEPS_TO_EXPLORE or CO_OPT_STEPS_TO_EXPLORE depending on the step.
INITIAL_STEPS_TO_EXPLORE = 1
# If True, any recursively explored step after the first one, will only attempt to move factors
# into the destination level which was the source for the previous move.
# NOTE: automatically set to True in case of 2 dimensions on the same fanout.
LIMIT_NEXT_STEP_DST_TO_CURRENT_SRC = False
# Same as LIMIT_NEXT_STEP_DST_TO_CURRENT_SRC, but applies only when optimizing spatial levels.
SPATIAL_LIMIT_NEXT_STEP_DST_TO_CURRENT_SRC = False
# Same as LIMIT_NEXT_STEP_DST_TO_CURRENT_SRC, but applies only when co-optimizing levels.
CO_OPT_LIMIT_NEXT_STEP_DST_TO_CURRENT_SRC = False
# If True, any intermediate step of a multi-step exploration will not need to satisfy architectural
# constraints, on the condition that the final step will satisfy them.
# NOTE: can be True iif LIMIT_NEXT_STEP_DST_TO_CURRENT_SRC is True
# NOTE: automatically set to True in case of 2 dimensions on the same fanout.
NO_CONSTRAINTS_CHECK_DURING_MULTISTEP = LIMIT_NEXT_STEP_DST_TO_CURRENT_SRC and False
# If True, in case of 2 dimensions on the same fanout only the FIRST dimension gets factors
# allocation during fanout maximization. If False, both dimensions equally get factors.
# NOTE: when this is True, optimizeDataflows also iterates over which dimension is maximized,
# in other words also fanout dimensions are permutated to pick the one to maximize.
# [Dimensions with a constraint are not iterated over]
# >>> Play with this in case of 2 dimensions on the same fanout!!!
# >>> Setting this to True costs Nx time, where N is the number of rotations of fanout dimensions.
# >>> Henceforth, usage is suggested when MULTITHREADED is True.
ONLY_MAXIMIZE_ONE_FANOUT_DIM = True
# If True, fanout maximization is replaced with an exploration of spatial fanout levels in three
# steps, together with memory levels, within factorFlow's local search.
LOCAL_SEARCH_SPATIAL_LEVELS = False
# When optimizing spatial levels (and memories are frozen), the Wart is computed with this number
# as the exponent on the utilization ratio at the denominator, penalizing sub-utilized mappings.
# NOTE: raise this if spatial optimization is underperforming, lower it if it's too greedy.
UTIL_EXP_IN_SEARCH_SPATIAL_LEVELS = 10
# If True, when memory and spatial levels are being co-optimized, if the source level for a move is
# a spatial level, at least one more move is always explored to try to replenish spatial utilization.
ONE_MORE_CO_OPT_STEP_IF_SRC_IS_SPATIAL = False
# If True, saves time by assuming that any permutation differing from an optimal one by the order
# of dimensions involving one with a single iteration can be optimized starting from where it
# already is, and thus avoids a complete re-initialization.
# NOTE: True is highly recommended to avoid prohibitive MSE times.
PERM_SKIP = True
# If True, saves time by assuming that any permutation differing from an optimal one by the order
# of dimensions involving one with a single iteration cannot be optimized further, thus skips
# it entirely. Setting it to False can slightly improve the best found mapping.
# NOTE: unless PERM_SKIP is True, this setting is useless.
HARD_PERM_SKIP = False
# If True, the list of permutations to explore on each level is filtered to keep a single permutation
# for each set of permutations that could reach the same reuse patterns (hence, those having the same
# innermost iterated dimension coupled to each operand and the same dimensions above and below it)
DISTINCT_REUSE_OPPORTUNITIES = True
# If True, rules are built based on the best selected permutations throughout the exploration,
# and these rules are subsequently enforced to prune the remaining permutations, speeding up
# the exploration under the assumption that there is some consistency between optimal choices.
PERM_PRUNING = False
# When PERM_PRUNING is True, the following 3 settings determine the number of times a dimension
# needs to (1) have a single iteration, (2) be in a certain relative order with another, and
# (3) occupy a specific position in the ordering of the best mapping found for a subset of
# permutations, before such occurrance becomes a rule that is enforced to prune later permutations.
DIM_AT_1_COUNT_BEFORE_LOCK = 2
RELATIVE_ORDER_COUNT_BEFORE_LOCK = 2
POSITIONAL_COUNT_BEFORE_LOCK = 2
# Determines the number of times the exploration of permutations repeats, restarting from the
# outermost level after reaching the innermost one. A value >1 is mean to "refine" previous choices.
RIPPLES = 1
# MODEL SETTINGS:
# If True, the Wart will be multiplied by the utilization of the fanouts in the spatial architecture,
# punishing mappings which underutilize fanouts.
UTILIZATION_IN_WART = True
# If True, drain reads will be assumed at zero energy cost for all levels, this is equivalent to
# assuming that the last write bypasses the target level, going upward, and directly writes in the
# above level, thus negating the need for a read to drain. As a result, drains also don't contribute
# to the final latency and level stalls anymore.
# NOTE: setting this to True is required to match Timeloop's results exactly.
FREE_DRAINS = False
# If True, GEMM dimensions might get padded to reach the least larger-than-current size which can
# be allocated to the entirety of a fanout's instances.
# This is performed as part of the fanout maximization.
# NOTE: in the 'exponential' mapper, this is useless unless ONLY_MAXIMIZE_ONE_FANOUT_DIM is True.
PADDED_MAPPINGS = False
# If True, the 'distinct_values' method in 'utils.py' uses an approximated formula to compute the
# distinct values which can be produced by a linear combination like 'x_const*x+y_const*y'.
# The resulting count, and the following reads/writes counts might thus be overestimated.
# When this is False, the computation of 'distinct_values' MAY (depends on strides) become slower.
# NOTE: setting this to True is needed to match Timeloop, as it uses the same approximation.
OVERESTIMATE_DISTINCT_VALUES = False
# Path to the folder above Accelergy, for a normal installation in Ubuntu that is usually like:
# "/home/<username>/.local/lib/python3.X/site-packages/"
# FactorFlow has been tested with commit 'd1d199e571e621ce11168efe1af2583dec0c2c49' of Accelergy.
# NOTE: this is NOT required if you have installed Accelergy as a python package and can import it.
ACCELERGY_PATH = "\\\\wsl.localhost/Ubuntu-22.04/home/zero/.local/lib/python3.10/site-packages"
# ENGINE SETTINGS:
# The mapper to import as part of the map-space exploration engine. Alternative mappers can
# be found in the folder "./mappers", use the name of the python file for this setting.
MAPPER = "breadthfirst_local"
# Which settings apply to which mapper:
# Setting | exponential | quadratic | linear | hybrid | local |
# ITERATE_AMOUNTS | o | o | o | o | o |
# SPATIAL_ITERATE_AMOUNTS | x | x | x | o | o |
# FREEZE_SPATIALS | o | x | x | x | x |
# STEPS_TO_EXPLORE | o | o | o | o | o |
# SPATIAL_STEPS_TO_EXPLORE | x | x | x | o | o |
# CO_OPT_STEPS_TO_EXPLORE | x | x | x | o | o |
# INITIAL_STEPS_TO_EXPLORE | x | x | x | o | o |
# LIMIT_NEXT_STEP_DST_TO_CURRENT_SRC | o | o | o | o | o |
# CO_OPT_LIMIT_NEXT_STEP_DST_TO_CURRENT_SRC | x | x | x | x | o |
# NO_CONSTRAINTS_CHECK_DURING_MULTISTEP | o | o | o | o | o |
# ONLY_MAXIMIZE_ONE_FANOUT_DIM | o | x | x | x | x |
# LOCAL_SEARCH_SPATIAL_LEVELS | x | o | o | o | o |
# UTIL_EXP_IN_SEARCH_SPATIAL_LEVELS | x | x | x | x | o |
# ONE_MORE_CO_OPT_STEP_IF_SRC_IS_SPATIAL | x | x | x | x | o |
# PERM_SKIP | o | o | o | o | x |
# HARD_PERM_SKIP | o | o | o | o | x |
# DISTINCT_REUSE_OPPORTUNITIES | o | o | o | o | x |
# PERM_PRUNING | o | x | x | o | x |
# RIPPLES | x | x | o | x | x |
# If True, the exploration of permutations done in optimizeDataflows will run across multiple
# threads (or better, processes, due to the GIL).
MULTITHREADED = True
# Number of threads to use if MULTITHREADED is True. If None, it is set to the number of
# logical CPUs available on the system.
THREADS_COUNT = 8
# Timeout for all blocking synchronization methods (e.g. join, wait).
TIMEOUT = 0.001
# flag used to propagate a ctrl+c to all threads
forced_termination_flag = False
@classmethod
def toString(self) -> str:
res = "Settings("
for k, v in vars(self).items():
if not k.startswith("__") and not callable(getattr(self, k)):
res += f"{k}={v}, "
return res[:-2] + ")"