Skip to content

Commit 7562a6b

Browse files
authored
[NFC] Add a lazy mode to LocalGraph (WebAssembly#6895)
LocalGraph by default will compute all the local.sets that can be read from all local.gets. However, many passes only query a small amount of those. To avoid wasted work, add a lazy mode that only computes sets when asked about a get. This is then used in a single place, LoopInvariantCodeMotion, which becomes 18% faster.
1 parent ad6a124 commit 7562a6b

File tree

4 files changed

+293
-59
lines changed

4 files changed

+293
-59
lines changed

src/ir/LocalGraph.cpp

Lines changed: 135 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,8 @@ struct Info {
4141

4242
// flow helper class. flows the gets to their sets
4343

44-
struct LocalGraph::LocalGraphFlower
45-
: public CFGWalker<LocalGraph::LocalGraphFlower,
46-
Visitor<LocalGraph::LocalGraphFlower>,
47-
Info> {
44+
struct LocalGraphFlower
45+
: public CFGWalker<LocalGraphFlower, Visitor<LocalGraphFlower>, Info> {
4846
LocalGraph::GetSetsMap& getSetsMap;
4947
LocalGraph::Locations& locations;
5048
Function* func;
@@ -90,10 +88,6 @@ struct LocalGraph::LocalGraphFlower
9088
self->locations[curr] = currp;
9189
}
9290

93-
// The below class-level items (currentIteration, FlowBlock, etc.) would more
94-
// properly belong inside flow(), as they are only needed there, but flow() is
95-
// split up into two parts in service of a future user of only part of flow().
96-
9791
// Each time we flow a get (or set of gets) to find its sets, we mark a
9892
// different iteration number. This lets us memoize the current iteration on
9993
// blocks as we pass them, allowing us to quickly skip them in that iteration
@@ -180,7 +174,7 @@ struct LocalGraph::LocalGraphFlower
180174
assert(entryFlowBlock != nullptr);
181175
}
182176

183-
// Flow all the data.
177+
// Flow all the data. This is done in eager (i.e., non-lazy) mode.
184178
void flow() {
185179
prepareFlowBlocks();
186180

@@ -301,22 +295,111 @@ struct LocalGraph::LocalGraphFlower
301295
// Bump the current iteration for the next time we are called.
302296
currentIteration++;
303297
}
298+
299+
// When the LocalGraph is in lazy mode we do not compute all of getSetsMap
300+
// initially, but instead fill in these data structures that let us do so
301+
// later for individual gets. Specifically we need to find the location of a
302+
// local.get in the CFG.
303+
struct BlockLocation {
304+
// The basic block an item is in.
305+
FlowBlock* block = nullptr;
306+
// The index in that block that the item is at.
307+
Index index;
308+
};
309+
std::unordered_map<LocalGet*, BlockLocation> getLocations;
310+
311+
// Set up getLocations using the flow blocks, so that we are ready to handle
312+
// later lazy requests for the sets of particular gets. This is done in lazy
313+
// mode.
314+
void prepareLaziness() {
315+
prepareFlowBlocks();
316+
317+
for (auto& block : flowBlocks) {
318+
const auto& actions = block.actions;
319+
for (Index i = 0; i < actions.size(); i++) {
320+
if (auto* get = actions[i]->dynCast<LocalGet>()) {
321+
getLocations[get] = BlockLocation{&block, i};
322+
}
323+
}
324+
}
325+
}
326+
327+
// Flow a specific get. This is done in lazy mode.
328+
void flowGet(LocalGet* get) {
329+
auto index = get->index;
330+
331+
// Regardless of what we do below, ensure an entry for this get, so that we
332+
// know we computed it.
333+
auto& sets = getSetsMap[get];
334+
335+
auto [block, blockIndex] = getLocations[get];
336+
if (!block) {
337+
// We did not find location info for this get, which means it is
338+
// unreachable.
339+
return;
340+
}
341+
342+
// We must have the get at that location.
343+
assert(blockIndex < block->actions.size());
344+
assert(block->actions[blockIndex] == get);
345+
346+
if (!hasSet[index]) {
347+
// As in flow(), when there is no local.set for an index we can just mark
348+
// the default value as the only writer.
349+
sets.insert(nullptr);
350+
return;
351+
}
352+
353+
// Go backwards in this flow block, from the get. If we see other gets that
354+
// have not been computed then we can accumulate them as well, as the
355+
// results we compute apply to them too.
356+
std::vector<LocalGet*> gets = {get};
357+
while (blockIndex > 0) {
358+
blockIndex--;
359+
auto* curr = block->actions[blockIndex];
360+
if (auto* otherGet = curr->dynCast<LocalGet>()) {
361+
if (otherGet->index == index) {
362+
// This is another get of the same index. If we've already computed
363+
// it, then we can just use that, as they must have the same sets.
364+
auto iter = getSetsMap.find(otherGet);
365+
if (iter != getSetsMap.end()) {
366+
auto& otherSets = iter->second;
367+
for (auto* get : gets) {
368+
getSetsMap[get] = otherSets;
369+
}
370+
return;
371+
}
372+
373+
// This is a get of the same index, but which has not been computed.
374+
// It will have the same sets as us.
375+
gets.push_back(otherGet);
376+
}
377+
} else {
378+
// This is a set.
379+
auto* set = curr->cast<LocalSet>();
380+
if (set->index == index) {
381+
// This is the only set writing to our gets.
382+
for (auto* get : gets) {
383+
getSetsMap[get].insert(set);
384+
}
385+
return;
386+
}
387+
}
388+
}
389+
390+
// We must do an inter-block flow.
391+
flowBackFromStartOfBlock(block, index, gets);
392+
}
304393
};
305394

306395
// LocalGraph implementation
307396

308-
LocalGraph::LocalGraph(Function* func, Module* module) : func(func) {
397+
LocalGraph::LocalGraph(Function* func, Module* module)
398+
: LocalGraphBase(func, module) {
309399
// See comment on the declaration of this field for why we use a raw
310-
// allocation. Note that since we just call flow() and delete it, this is not
311-
// really needed, but it sets the stage for a later PR that will do other work
312-
// here (related to the splitting up of flow() that is mentioned earlier).
313-
flower =
314-
std::make_unique<LocalGraphFlower>(getSetsMap, locations, func, module);
315-
316-
flower->flow();
317-
318-
// We will never use it again.
319-
flower.reset();
400+
// allocation.
401+
LocalGraphFlower flower(getSetsMap, locations, func, module);
402+
flower.flow();
320403

321404
#ifdef LOCAL_GRAPH_DEBUG
322405
std::cout << "LocalGraph::dump\n";
@@ -330,13 +413,6 @@ LocalGraph::LocalGraph(Function* func, Module* module) : func(func) {
330413
#endif
331414
}
332415

333-
LocalGraph::~LocalGraph() {
334-
// We must declare a destructor here in the cpp file, even though it is empty
335-
// and pointless, due to some C++ issue with our having a unique_ptr to a
336-
// forward-declared class (LocalGraphFlower).
337-
// https://stackoverflow.com/questions/13414652/forward-declaration-with-unique-ptr#comment110005453_13414884
338-
}
339-
340416
bool LocalGraph::equivalent(LocalGet* a, LocalGet* b) {
341417
auto& aSets = getSets(a);
342418
auto& bSets = getSets(b);
@@ -421,4 +497,36 @@ void LocalGraph::computeSSAIndexes() {
421497

422498
bool LocalGraph::isSSA(Index x) { return SSAIndexes.count(x); }
423499

500+
// LazyLocalGraph
501+
502+
LazyLocalGraph::LazyLocalGraph(Function* func, Module* module)
503+
: LocalGraphBase(func, module) {
504+
flower =
505+
std::make_unique<LocalGraphFlower>(getSetsMap, locations, func, module);
506+
507+
flower->prepareLaziness();
508+
509+
#ifdef LOCAL_GRAPH_DEBUG
510+
std::cout << "LazyLocalGraph::dump\n";
511+
for (auto& [get, sets] : getSetsMap) {
512+
std::cout << "GET\n" << get << " is influenced by\n";
513+
for (auto* set : sets) {
514+
std::cout << set << '\n';
515+
}
516+
}
517+
std::cout << "total locations: " << locations.size() << '\n';
518+
#endif
519+
}
520+
521+
LazyLocalGraph::~LazyLocalGraph() {
522+
// We must declare a destructor here in the cpp file, even though it is empty
523+
// and pointless, due to some C++ issue with our having a unique_ptr to a
524+
// forward-declared class (LocalGraphFlower).
525+
// https://stackoverflow.com/questions/13414652/forward-declaration-with-unique-ptr#comment110005453_13414884
526+
}
527+
528+
void LazyLocalGraph::computeGetSets(LocalGet* get) const {
529+
flower->flowGet(get);
530+
}
531+
424532
} // namespace wasm

src/ir/local-graph.h

Lines changed: 72 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,56 @@ namespace wasm {
3838
// debugging etc.; and it has no downside for optimization, since unreachable
3939
// code will be removed anyhow).
4040
//
41-
struct LocalGraph {
41+
// There are two options here, the normal LocalGraph which is eager and computes
42+
// everything up front, which is faster if most things end up needed, and a lazy
43+
// one which computes on demand, which can be much faster if we only need a
44+
// small subset of queries.
45+
//
46+
47+
// Base class for both LocalGraph and LazyLocalGraph (not meant for direct use).
48+
struct LocalGraphBase {
49+
protected:
4250
// If a module is passed in, it is used to find which features are needed in
4351
// the computation (for example, if exception handling is disabled, then we
4452
// can generate a simpler CFG, as calls cannot throw).
53+
LocalGraphBase(Function* func, Module* module = nullptr)
54+
: func(func), module(module) {}
55+
56+
public:
57+
// A set of sets, returned from the query about which sets can be read from a
58+
// get. Typically only one or two apply there, so this is a small set.
59+
using Sets = SmallSet<LocalSet*, 2>;
60+
61+
// Where each get and set is. We compute this while doing the main computation
62+
// and make it accessible for users, for easy replacing of things without
63+
// extra work.
64+
using Locations = std::map<Expression*, Expression**>;
65+
Locations locations;
66+
67+
// Sets of gets or sets, that are influenced, returned from get*Influences().
68+
using SetInfluences = std::unordered_set<LocalGet*>;
69+
using GetInfluences = std::unordered_set<LocalSet*>;
70+
71+
// Defined publicly as other utilities need similar data layouts.
72+
using GetSetsMap = std::unordered_map<LocalGet*, Sets>;
73+
74+
protected:
75+
Function* func;
76+
Module* module;
77+
78+
std::set<Index> SSAIndexes;
79+
80+
// A map of each get to the sets relevant to it. This is mutable so that
81+
// getSets() can be const in LazyLocalGraph (which does memoization, see
82+
// below).
83+
mutable GetSetsMap getSetsMap;
84+
85+
std::unordered_map<LocalSet*, SetInfluences> setInfluences;
86+
std::unordered_map<LocalGet*, GetInfluences> getInfluences;
87+
};
88+
89+
struct LocalGraph : public LocalGraphBase {
4590
LocalGraph(Function* func, Module* module = nullptr);
46-
~LocalGraph();
4791

4892
// Get the sets relevant for a local.get.
4993
//
@@ -52,23 +96,19 @@ struct LocalGraph {
5296
// for a param.
5397
//
5498
// Often there is a single set, or a phi or two items, so we use a small set.
55-
using Sets = SmallSet<LocalSet*, 2>;
5699
const Sets& getSets(LocalGet* get) const {
57100
auto iter = getSetsMap.find(get);
58101
if (iter == getSetsMap.end()) {
102+
// A missing entry means there is nothing there (and we saved a little
103+
// space by not putting something there).
104+
//
59105
// Use a canonical constant empty set to avoid allocation.
60106
static const Sets empty;
61107
return empty;
62108
}
63109
return iter->second;
64110
}
65111

66-
// Where each get and set is. We compute this while doing the main computation
67-
// and make it accessible for users, for easy replacing of things without
68-
// extra work.
69-
using Locations = std::map<Expression*, Expression**>;
70-
Locations locations;
71-
72112
// Checks if two gets are equivalent, that is, definitely have the same
73113
// value.
74114
bool equivalent(LocalGet* a, LocalGet* b);
@@ -84,9 +124,6 @@ struct LocalGraph {
84124
computeGetInfluences();
85125
}
86126

87-
using SetInfluences = std::unordered_set<LocalGet*>;
88-
using GetInfluences = std::unordered_set<LocalSet*>;
89-
90127
const SetInfluences& getSetInfluences(LocalSet* set) const {
91128
auto iter = setInfluences.find(set);
92129
if (iter == setInfluences.end()) {
@@ -130,28 +167,34 @@ struct LocalGraph {
130167
void computeSSAIndexes();
131168

132169
bool isSSA(Index x);
170+
};
133171

134-
// Defined publicly as other utilities need similar data layouts.
135-
using GetSetsMap = std::unordered_map<LocalGet*, Sets>;
172+
// The internal implementation of the flow analysis used to compute things. This
173+
// must be declared in the header so that LazyLocalGraph can declare a unique
174+
// ptr to it, below.
175+
struct LocalGraphFlower;
136176

137-
private:
138-
Function* func;
139-
std::set<Index> SSAIndexes;
177+
struct LazyLocalGraph : public LocalGraphBase {
178+
LazyLocalGraph(Function* func, Module* module = nullptr);
179+
~LazyLocalGraph();
140180

141-
// A map of each get to the sets relevant to it. This is mutable so that
142-
// getSets() can be const.
143-
mutable GetSetsMap getSetsMap;
181+
const Sets& getSets(LocalGet* get) const {
182+
auto iter = getSetsMap.find(get);
183+
if (iter == getSetsMap.end()) {
184+
// A missing entry means we did not do the computation yet. Do it now.
185+
computeGetSets(get);
186+
iter = getSetsMap.find(get);
187+
assert(iter != getSetsMap.end());
188+
}
189+
return iter->second;
190+
}
144191

145-
// The internal implementation of the flow analysis used to compute
146-
// getSetsMap.
147-
struct LocalGraphFlower;
148-
// This could be a unique_ptr, but the forward declaration is not compatible
149-
// with that. It could alternatively be a shared_ptr, but that runs into what
150-
// seems to be a false positive of clang's (but not gcc's) UBSan.
151-
std::unique_ptr<LocalGraphFlower> flower;
192+
private:
193+
// Compute the sets for a get and store them on getSetsMap.
194+
void computeGetSets(LocalGet* get) const;
152195

153-
std::unordered_map<LocalSet*, SetInfluences> setInfluences;
154-
std::unordered_map<LocalGet*, GetInfluences> getInfluences;
196+
// This remains alive as long as we are, so that we can compute things lazily.
197+
std::unique_ptr<LocalGraphFlower> flower;
155198
};
156199

157200
} // namespace wasm

src/passes/LoopInvariantCodeMotion.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,12 @@ struct LoopInvariantCodeMotion
4545

4646
// main entry point
4747

48-
LocalGraph* localGraph;
48+
LazyLocalGraph* localGraph;
4949

5050
void doWalkFunction(Function* func) {
51-
// Compute all local dependencies first.
52-
LocalGraph localGraphInstance(func, getModule());
51+
// Prepare to compute the local dependencies we care about. We may only need
52+
// very few, so use a lazy LocalGraph.
53+
LazyLocalGraph localGraphInstance(func, getModule());
5354
localGraph = &localGraphInstance;
5455
// Traverse the function.
5556
super::doWalkFunction(func);

0 commit comments

Comments
 (0)