From b31d4ef8211265dde62e10a8dc0926f442347198 Mon Sep 17 00:00:00 2001 From: pcineverdies Date: Fri, 22 Aug 2025 17:52:09 +0200 Subject: [PATCH] [Experimental][SQ] Added "Straight to the queue" pass implementation --- .../Transforms/HandshakeStraightToQueue.h | 34 ++ .../include/experimental/Transforms/Passes.h | 1 + .../include/experimental/Transforms/Passes.td | 10 + experimental/lib/Transforms/CMakeLists.txt | 1 + .../Transforms/HandshakeStraightToQueue.cpp | 528 ++++++++++++++++++ tools/dynamatic/dynamatic.cpp | 7 +- tools/dynamatic/scripts/compile.sh | 44 +- 7 files changed, 616 insertions(+), 9 deletions(-) create mode 100644 experimental/include/experimental/Transforms/HandshakeStraightToQueue.h create mode 100644 experimental/lib/Transforms/HandshakeStraightToQueue.cpp diff --git a/experimental/include/experimental/Transforms/HandshakeStraightToQueue.h b/experimental/include/experimental/Transforms/HandshakeStraightToQueue.h new file mode 100644 index 0000000000..9b88a942d3 --- /dev/null +++ b/experimental/include/experimental/Transforms/HandshakeStraightToQueue.h @@ -0,0 +1,34 @@ +//===- HandshakeStraightToQueue.h - Implement S2Q algorithm --*- C++ -*----===// +// +// Dynamatic is under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the pass which allows to implement straight to the queue, +// a different way of allocating basic blocks in the LSQ, based on an ASAP +// approach rather than relying on the network of cmerges. +// +//===----------------------------------------------------------------------===// + +#ifndef DYNAMATIC_TRANSFORMS_HANDSHAKESTRAIGHTTOQUEUE_H +#define DYNAMATIC_TRANSFORMS_HANDSHAKESTRAIGHTTOQUEUE_H +#include "dynamatic/Support/DynamaticPass.h" +#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" + +namespace dynamatic { +namespace experimental { +namespace ftd { + +std::unique_ptr createStraightToQueue(); + +#define GEN_PASS_DECL_HANDSHAKESTRAIGHTTOQUEUE +#define GEN_PASS_DEF_HANDSHAKESTRAIGHTTOQUEUE +#include "experimental/Transforms/Passes.h.inc" + +} // namespace ftd +} // namespace experimental +} // namespace dynamatic +#endif // DYNAMATIC_TRANSFORMS_HANDSHAKESTRAIGHTTOQUEUE_H diff --git a/experimental/include/experimental/Transforms/Passes.h b/experimental/include/experimental/Transforms/Passes.h index 310af376d7..ec76104daf 100644 --- a/experimental/include/experimental/Transforms/Passes.h +++ b/experimental/include/experimental/Transforms/Passes.h @@ -17,6 +17,7 @@ #include "dynamatic/Support/LLVM.h" #include "experimental/Transforms/HandshakeCombineSteeringLogic.h" #include "experimental/Transforms/HandshakePlaceBuffersCustom.h" +#include "experimental/Transforms/HandshakeStraightToQueue.h" #include "experimental/Transforms/LSQSizing/HandshakeSizeLSQs.h" #include "experimental/Transforms/ResourceSharing/Crush.h" #include "experimental/Transforms/Rigidification/HandshakeRigidification.h" diff --git a/experimental/include/experimental/Transforms/Passes.td b/experimental/include/experimental/Transforms/Passes.td index 84a436821c..593c116d7d 100644 --- a/experimental/include/experimental/Transforms/Passes.td +++ b/experimental/include/experimental/Transforms/Passes.td @@ -26,6 +26,16 @@ def HandshakeCombineSteeringLogic : DynamaticPass< "handshake-combine-steering-l }]; let constructor = "dynamatic::experimental::ftd::combineSteeringLogic()"; } +def HandshakeStraightToQueue : DynamaticPass< "handshake-straight-to-queue"> { + let summary = "Use the FPGA'23 technique to allocate the basic blocks in the LSQs"; + let description = [{ + "Straight to the queue" allows to allocate the basic blocks in the LSQ + according to an ASAP approach rather than by following the control flow + relationships coming from the network of cmerges. + }]; + let constructor = "dynamatic::experimental::ftd::createStraightToQueue()"; + let dependentDialects = ["mlir::cf::ControlFlowDialect", "mlir::func::FuncDialect"]; +} def HandshakeSpeculation : DynamaticPass<"handshake-speculation"> { let summary = "Place Speculation operations"; diff --git a/experimental/lib/Transforms/CMakeLists.txt b/experimental/lib/Transforms/CMakeLists.txt index 7e0e87b561..2c24caeada 100644 --- a/experimental/lib/Transforms/CMakeLists.txt +++ b/experimental/lib/Transforms/CMakeLists.txt @@ -1,6 +1,7 @@ add_dynamatic_library(DynamaticExperimentalTransforms HandshakePlaceBuffersCustom.cpp HandshakeCombineSteeringLogic.cpp + HandshakeStraightToQueue.cpp DEPENDS DynamaticExperimentalTransformsPassIncGen diff --git a/experimental/lib/Transforms/HandshakeStraightToQueue.cpp b/experimental/lib/Transforms/HandshakeStraightToQueue.cpp new file mode 100644 index 0000000000..3f3df41b1d --- /dev/null +++ b/experimental/lib/Transforms/HandshakeStraightToQueue.cpp @@ -0,0 +1,528 @@ +//===- HandshakeStraightToQueue.cpp - Implement S2Q algorithm -*- C++ -*---===// +// +// Dynamatic is under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the pass which allows to implement straight to the +// queue, a different way of allocating basic blocks in the LSQ, based on an +// ASAP approach rather than relying on the network of cmerges. +// +//===----------------------------------------------------------------------===// + +#include "experimental/Transforms/HandshakeStraightToQueue.h" +#include "dynamatic/Dialect/Handshake/HandshakeOps.h" +#include "dynamatic/Dialect/Handshake/HandshakeTypes.h" +#include "experimental/Support/CFGAnnotation.h" +#include "experimental/Support/FtdImplementation.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/DialectConversion.h" +#include + +using namespace mlir; +using namespace dynamatic; +using namespace dynamatic::experimental; + +namespace { + +struct ProdConsMemDep { + Block *prodBb; + Block *consBb; + + ProdConsMemDep(Block *prod, Block *cons) : prodBb(prod), consBb(cons) {} + /// Print the dependency stored in the current relationship + void print(); +}; + +/// A group represents all the memory operations belonging to the same basic +/// block which require the same LSQ. It contains a reference to the BB, a set +/// of predecessor in the dependence graph and a set of successors. +struct MemoryGroup { + // The BB the group defines + Block *bb; + + // List of predecessors of the group + DenseSet preds; + // List of successors of the group + DenseSet succs; + + // Constructor for the group + MemoryGroup(Block *b) : bb(b) {} + + /// Print the dependenices of the curent group + void print(); +}; + +void ProdConsMemDep::print() { + llvm::dbgs() << "[PROD_CONS_MEM_DEP] Dependency from ["; + prodBb->printAsOperand(llvm::dbgs()); + llvm::dbgs() << "] to ["; + consBb->printAsOperand(llvm::dbgs()); + llvm::dbgs() << "]\n"; +} + +void MemoryGroup::print() { + llvm::dbgs() << "[MEM_GROUP] Group for ["; + bb->printAsOperand(llvm::dbgs()); + llvm::dbgs() << "]; predecessors = {"; + for (auto &gp : preds) { + gp->bb->printAsOperand(llvm::dbgs()); + llvm::dbgs() << ", "; + } + llvm::dbgs() << "}; successors = {"; + for (auto &gp : succs) { + gp->bb->printAsOperand(llvm::dbgs()); + llvm::dbgs() << ", "; + } + llvm::dbgs() << "} \n"; +} + +/// Given a list of operations, return the list of memory dependencies for +/// each block. This allows to build the group graph, which allows to +/// determine the dependencies between memory access inside basic blocks. +/// Two types of hazards between the predecessors of one LSQ node: +/// (1) WAW between 2 Store operations, +/// (2) RAW and WAR between Load and Store operations +static SmallVector identifyMemoryDependencies( + handshake::FuncOp &funcOp, + const SmallVector &operations) { + + mlir::DominanceInfo domInfo; + ftd::BlockIndexing bi(funcOp.getRegion()); + mlir::CFGLoopInfo li(domInfo.getDomTree(&funcOp.getRegion())); + + SmallVector allMemDeps; + + // Returns true if there exist a path between `op1` and `op2` + auto isThereAPath = [&](Operation *op1, Operation *op2) -> bool { + return !findAllPaths(op1->getBlock(), op2->getBlock(), bi).empty(); + }; + + // Returns true if two operations are both load + auto areBothLoad = [](Operation *op1, Operation *op2) { + return (isa(op1) && isa(op2)); + }; + + // Returns true if two operations belong to the same block + auto isSameBlock = [](Operation *op1, Operation *op2) { + return (op1->getBlock() == op2->getBlock()); + }; + + // Given all the operations which are assigned to an LSQ, loop over them + // and skip those which are not memory operations + for (handshake::MemPortOpInterface i : operations) { + + // Loop over all the other operations in the LSQ. There is no dependency + // in the following cases: + // 1. One of them is not a memory operation; + // 2. The two operations are in the same group, thus they are in the same + // BB; + // 3. They are both load operations; + // 4. The operations are mutually exclusive (i.e. there is no path which + // goes from i to j and vice-versa); + for (handshake::MemPortOpInterface j : operations) { + + if (isSameBlock(i, j) || areBothLoad(i, j) || + (!isThereAPath(i, j) && !isThereAPath(j, i))) + continue; + + // Get the two blocks + Block *bbI = i->getBlock(), *bbJ = j->getBlock(); + + // If the relationship was already present, then skip the pairs of + // blocks + if (llvm::find_if(allMemDeps, [bbI, bbJ](ProdConsMemDep p) { + return p.prodBb == bbJ && p.consBb == bbI; + }) != allMemDeps.end()) + continue; + + // Insert a dependency only if index _j is smaller than index _i: in + // this case i is the producer, j is the consumer. If this doesn't + // hold, the dependency will be added when the two blocks are analyzed + // in the opposite direction + if (bi.isLess(bbJ, bbI)) { + allMemDeps.push_back(ProdConsMemDep(bbJ, bbI)); + + // If the two blocks are in the same loop, then bbI is also a + // consumer, while bbJ is a producer. This relationship is backward. + if (ftd::isSameLoopBlocks(bbI, bbJ, li)) + allMemDeps.push_back(ProdConsMemDep(bbI, bbJ)); + } + } + } + + return allMemDeps; +} + +/// Given a set of operations related to one LSQ and the memory dependency +/// information among them, create a group graph. +static DenseSet +constructGroupsGraph(SmallVector &lsqOps, + SmallVector &lsqMemDeps) { + + DenseSet groups; + + // Given the operations related to the LSQ, create a group for each of the + // correspondent basic block + for (Operation *op : lsqOps) { + Block *b = op->getBlock(); + auto it = llvm::find_if(groups, [b](MemoryGroup *g) { return g->bb == b; }); + if (it == groups.end()) { + MemoryGroup *g = new MemoryGroup(b); + groups.insert(g); + } + } + + // If there exist a relationship O_i -> O_j, with O_i being in basic BB_i + // (thus group i) and O_j being in BB_j (thus in group j), add G_i to the + // predecessors of G_j, G_j to the successors of G_i + for (ProdConsMemDep memDep : lsqMemDeps) { + // Find the group related to the producer + MemoryGroup *producerGroup = + *llvm::find_if(groups, [&memDep](const MemoryGroup *group) { + return group->bb == memDep.prodBb; + }); + + // Find the group related to the consumer + MemoryGroup *consumerGroup = + *llvm::find_if(groups, [&memDep](const MemoryGroup *group) { + return group->bb == memDep.consBb; + }); + + // create edges to link the groups + producerGroup->succs.insert(consumerGroup); + consumerGroup->preds.insert(producerGroup); + } + + // Add a self dependency each time you have a group with no dependency + for (MemoryGroup *g : groups) { + if (!g->preds.size()) { + g->preds.insert(g); + g->succs.insert(g); + } + } + + return groups; +} + +/// Minimizes the connections between groups based on dominance info. Let's +/// consider the graph +/// +/// B -> C -> D +/// | ^ +/// |---------| +/// +/// having B, C and D as groups, B being predecessor of both C and D, C of D. +/// Since C has to wait for B to be done, and D has to wait for C to be done, +/// there is no point in D waiting for C to be done. For this reason, the +/// graph can be simplified, saving and edge: +/// +/// B -> C -> D +static void minimizeGroupsConnections(handshake::FuncOp funcOp, + DenseSet &groupsGraph) { + + // Get the dominance info for the region + DominanceInfo domInfo; + ftd::BlockIndexing bi(funcOp.getRegion()); + + // For each group, compare all the pairs of its predecessors. Cut the edge + // between them iff the predecessor with the bigger index dominates the + // whole group + for (auto &group : groupsGraph) { + // List of predecessors to remove + DenseSet predsToRemove; + for (auto &bp : group->preds) { + + for (auto &sp : group->preds) { + + // if we are considering the same elements, ignore them + if (sp->bb == bp->bb || bi.isGreater(sp->bb, bp->bb)) + continue; + + // Add the small predecessors to the list of elements to remove in + // case the big predecessor has the small one among its + // predecessors, and the big precessor's BB properly dominates the + // BB of the group currently under analysis + if ((bp->preds.find(sp) != bp->preds.end()) && + domInfo.properlyDominates(bp->bb, group->bb)) { + predsToRemove.insert(sp); + } + } + } + + for (auto *pred : predsToRemove) { + group->preds.erase(pred); + pred->succs.erase(group); + } + } +} + +/// For each element in the group, build a lazy fork and use its output to feed +/// the correspondent input of the LSQ. +static DenseMap +connectLSQToForkGraph(handshake::FuncOp &funcOp, + DenseSet &groups, handshake::LSQOp lsqOp, + PatternRewriter &rewriter) { + + DenseMap forksGraph; + auto startValue = (Value)funcOp.getArguments().back(); + + // Create the fork nodes: for each group among the set of groups + for (MemoryGroup *group : groups) { + Block *bb = group->bb; + rewriter.setInsertionPointToStart(bb); + + // Add a lazy fork with two outputs, having the start control value as + // input and two output ports, one for the LSQ and one for the subsequent + // buffer + auto forkOp = rewriter.create(bb->front().getLoc(), + startValue, 2); + + // Add the new component to the list of components create for FTD and to + // the fork graph + forksGraph[bb] = forkOp; + } + + // The second output of each lazy fork must be connected to the LSQ, so that + // they can activate the allocation for the operations of the corresponding + // basic block + // + // For each input of the LSQ + for (auto [opIdx, op] : llvm::enumerate(lsqOp.getOperands())) { + // If it is not a cmerge, then continue + if (!llvm::isa_and_nonnull(op.getDefiningOp())) + continue; + + // Replace the input if it comes from a cmerge in the same block of a lazy + // fork of the graph + auto cmerge = llvm::dyn_cast(op.getDefiningOp()); + Block *bb = cmerge->getBlock(); + if (!forksGraph.contains(bb)) + continue; + lsqOp.setOperand(opIdx, forksGraph[bb]->getResult(1)); + } + + return forksGraph; +} + +/// Get all the load and store operations related to a LSQ operation +static SmallVector +getLsqOps(handshake::FuncOp &funcOp, handshake::LSQOp lsqOp) { + SmallVector lsqOps; + + for (auto memOp : funcOp.getOps()) { + if (llvm::any_of(memOp.getAddressOutput().getUsers(), + [&](Operation *user) { return user == lsqOp; })) + lsqOps.push_back(memOp); + } + + return lsqOps; +} + +/// Given a graph of lazy forks, connect the elements together with some proper +/// SSA phi +static LogicalResult +connectForkGraph(handshake::FuncOp &funcOp, + const DenseSet &groupsGraph, + const DenseMap &forksGraph, + PatternRewriter &rewriter) { + + for (MemoryGroup *consumerGroup : groupsGraph) { + + DenseMap> deps; + SmallVector forkDeps; + + for (auto &producerGroup : consumerGroup->preds) { + Operation *producerLF = forksGraph.at(producerGroup->bb); + forkDeps.push_back(producerLF->getResult(0)); + } + + deps[&forksGraph.at(consumerGroup->bb)->getOpOperand(0)] = forkDeps; + + if (failed(ftd::createPhiNetworkDeps(funcOp.getRegion(), rewriter, deps))) + return failure(); + } + return success(); +} + +/// Remove the network of cmerges in case the function is void. The SQ pass +/// guarantees that no network is required any longer. All the remaining +/// connections (to the number of store constants and to the memory controllers) +/// are substitued with start. +static void removeNetworkCMerges(handshake::FuncOp &funcOp, + PatternRewriter &rewriter) { + + // Get the end operation and its operands. + handshake::EndOp endOperation = *funcOp.getOps().begin(); + auto operands = endOperation->getOperands(); + + // Check if the function is void. This happens if all the types of the + // arguments of the `end` operands are identical. + bool hasReturnValue = llvm::any_of(operands, [&](Value v) -> bool { + return v.getType() != operands[0].getType(); + }); + + // Cannot remove the network if the function is void + if (!hasReturnValue) + return; + + // The FTD circuit introduces no branch operations, so the ones in the circuit + // are only related to the network of cmerges + auto branchOps = funcOp.getOps(); + + if (branchOps.empty()) + return; + + // Operations that needs to be removed + DenseSet operationsToRemove; + // Operations that needs to be traversed via BFS, handled in a FIFO approach + SmallVector operationsToTraverse; + + // Insert in both the sets the list of + operationsToTraverse.push_back((Operation *)(*branchOps.begin())); + operationsToRemove.insert(operationsToTraverse.front()); + + auto startValue = (Value)funcOp.getArguments().back(); + + // While there are operations to be traversed + while (!operationsToTraverse.empty()) { + Operation *toTraverse = operationsToTraverse.pop_back_val(); + + // The network is made of control merges, branches and conditional branches: + // ignore anything else. + if (!(llvm::isa(toTraverse) || + llvm::isa(toTraverse) || + llvm::isa(toTraverse))) { + continue; + } + + // The operation needs to be removed + operationsToRemove.insert(toTraverse); + + // Consider all the users of some results of the operations, and add them to + // the operations to traverse + for (auto result : toTraverse->getResults()) { + for (auto *user : result.getUsers()) { + if (!operationsToRemove.contains(user)) + operationsToTraverse.push_back(user); + } + } + + // Since the operations is to be remove, its results are not needed anymore, + // and can be substitued with `start`. This is useful to guarantee that any + // operation is not in use once it gets removed. + + // Substitute all the control merge results with start + if (auto cmerge = llvm::dyn_cast(toTraverse); + cmerge) + rewriter.replaceAllUsesWith(cmerge.getResult(), startValue); + + // Substitute all the branch results with start + if (auto br = llvm::dyn_cast(toTraverse); br) + rewriter.replaceAllUsesWith(br.getResult(), startValue); + + // Substitute all the conditional branch results with start + if (auto condBr = + llvm::dyn_cast(toTraverse); + condBr) { + rewriter.replaceAllUsesWith(condBr.getTrueResult(), startValue); + rewriter.replaceAllUsesWith(condBr.getFalseResult(), startValue); + } + } + + for (auto *toRemove : operationsToRemove) + toRemove->erase(); +} + +/// Run straight to the queue. +static LogicalResult applyStraightToQueue(handshake::FuncOp funcOp, + MLIRContext *ctx) { + + ConversionPatternRewriter rewriter(ctx); + + // Return if there are no LSQs in the function + if (funcOp.getOps().empty()) { + removeNetworkCMerges(funcOp, rewriter); + return success(); + } + + // Restore the cf structure to work on a structured IR + if (failed(cfg::restoreCfStructure(funcOp, rewriter))) + return failure(); + + // For each LSQ + for (const handshake::LSQOp lsqOp : funcOp.getOps()) { + + // Collect all the operations related to that LSQ + auto lsqOps = getLsqOps(funcOp, lsqOp); + + // Get all the memory depdencies among the operations connected to the + // same LSQ + auto lsqMemDeps = identifyMemoryDependencies(funcOp, lsqOps); + for (auto &dep : lsqMemDeps) + dep.print(); + + // Build a group graph out of the dependencies + auto groupsGraph = constructGroupsGraph(lsqOps, lsqMemDeps); + + // Apply group minimization techniques + minimizeGroupsConnections(funcOp, groupsGraph); + + for (auto &g : groupsGraph) + g->print(); + + // Build a lazy fork for each group and connect it to the related + // activation input in the LSQ + auto forksGraph = + connectLSQToForkGraph(funcOp, groupsGraph, lsqOp, rewriter); + + // Connect the lazy forks together through a network of merges + if (failed(connectForkGraph(funcOp, groupsGraph, forksGraph, rewriter))) + return failure(); + + // Delete the groups + for (auto *g : groupsGraph) + delete g; + } + + // Replace each merge created by `createPhiNetwork` with a multiplxer + if (failed(ftd::replaceMergeToGSA(funcOp, rewriter))) + return failure(); + + // Run fast token delivery on the newly inserted operations + experimental::ftd::addRegen(funcOp, rewriter); + experimental::ftd::addSupp(funcOp, rewriter); + experimental::cfg::markBasicBlocks(funcOp, rewriter); + + // Try to remove the network of cmerges if possible (i.e. if the function was + // void) + removeNetworkCMerges(funcOp, rewriter); + + // Remove the blocks and terminators + if (failed(cfg::flattenFunction(funcOp))) + return failure(); + + return success(); +} + +struct HandshakeStraightToQueuePass + : public dynamatic::experimental::ftd::impl::HandshakeStraightToQueueBase< + HandshakeStraightToQueuePass> { + + void runDynamaticPass() override { + MLIRContext *ctx = &getContext(); + mlir::ModuleOp module = getOperation(); + + for (auto funcOp : module.getOps()) + if (failed(applyStraightToQueue(funcOp, ctx))) + signalPassFailure(); + }; +}; +} // namespace + +std::unique_ptr +dynamatic::experimental::ftd::createStraightToQueue() { + return std::make_unique(); +} diff --git a/tools/dynamatic/dynamatic.cpp b/tools/dynamatic/dynamatic.cpp index 9f2021eb7b..3e034042a5 100644 --- a/tools/dynamatic/dynamatic.cpp +++ b/tools/dynamatic/dynamatic.cpp @@ -283,6 +283,7 @@ class Compile : public Command { static constexpr llvm::StringLiteral SHARING = "sharing"; static constexpr llvm::StringLiteral RIGIDIFICATION = "rigidification"; static constexpr llvm::StringLiteral DISABLE_LSQ = "disable-lsq"; + static constexpr llvm::StringLiteral STRAIGHT_TO_QUEUE = "straight-to-queue"; Compile(FrontendState &state) : Command("compile", @@ -304,6 +305,8 @@ class Compile : public Command { addFlag({DISABLE_LSQ, "Force usage of memory controllers instead of LSQs. " "Warning: This may result in out-of-order memory " "accesses, use with caution!"}); + addFlag({STRAIGHT_TO_QUEUE, + "Use straight to queue to connect the circuit to the LSQ"}); } CommandResult execute(CommandArguments &args) override; @@ -637,6 +640,8 @@ CommandResult Compile::execute(CommandArguments &args) { std::string buffers = "on-merges"; std::string fastTokenDelivery = args.flags.contains(FAST_TOKEN_DELIVERY) ? "1" : "0"; + std::string straightToQueue = + args.flags.contains(STRAIGHT_TO_QUEUE) ? "1" : "0"; if (auto it = args.options.find(BUFFER_ALGORITHM); it != args.options.end()) { if (it->second == "on-merges" || it->second == "fpga20" || @@ -665,7 +670,7 @@ CommandResult Compile::execute(CommandArguments &args) { state.getOutputDir(), state.getKernelName(), buffers, floatToString(state.targetCP, 3), state.polygeistPath, sharing, state.fpUnitsGenerator, rigidification, disableLSQ, - fastTokenDelivery); + fastTokenDelivery, straightToQueue); } CommandResult WriteHDL::execute(CommandArguments &args) { diff --git a/tools/dynamatic/scripts/compile.sh b/tools/dynamatic/scripts/compile.sh index 7d4ac4652c..382d550819 100755 --- a/tools/dynamatic/scripts/compile.sh +++ b/tools/dynamatic/scripts/compile.sh @@ -19,6 +19,7 @@ FPUNITS_GEN=$9 USE_RIGIDIFICATION=${10} DISABLE_LSQ=${11} FAST_TOKEN_DELIVERY=${12} +STRAIGHT_TO_QUEUE=${13} POLYGEIST_CLANG_BIN="$POLYGEIST_PATH/build/bin/cgeist" CLANGXX_BIN="$POLYGEIST_PATH/llvm-project/build/bin/clang++" @@ -45,6 +46,7 @@ F_HANDSHAKE_TRANSFORMED="$COMP_DIR/handshake_transformed.mlir" F_HANDSHAKE_BUFFERED="$COMP_DIR/handshake_buffered.mlir" F_HANDSHAKE_EXPORT="$COMP_DIR/handshake_export.mlir" F_HANDSHAKE_RIGIDIFIED="$COMP_DIR/handshake_rigidified.mlir" +F_HANDSHAKE_SQ="$COMP_DIR/handshake_sq.mlir" F_HW="$COMP_DIR/hw.mlir" F_FREQUENCIES="$COMP_DIR/frequencies.csv" @@ -165,14 +167,40 @@ else exit_on_fail "Failed to compile cf to handshake" "Compiled cf to handshake" fi -# handshake transformations -"$DYNAMATIC_OPT_BIN" "$F_HANDSHAKE" \ - --handshake-analyze-lsq-usage --handshake-replace-memory-interfaces \ - --handshake-minimize-cst-width --handshake-optimize-bitwidths \ - --handshake-materialize --handshake-infer-basic-blocks \ - > "$F_HANDSHAKE_TRANSFORMED" -exit_on_fail "Failed to apply transformations to handshake" \ - "Applied transformations to handshake" +if [[ $STRAIGHT_TO_QUEUE -ne 0 ]]; then + + echo_info "Using FPGA'23 for LSQ connection" + + # FPT19 should run before straight to the queue, so that no useless components are instantiated. + "$DYNAMATIC_OPT_BIN" "$F_HANDSHAKE" \ + --handshake-analyze-lsq-usage \ + --handshake-replace-memory-interfaces \ + --handshake-straight-to-queue \ + --handshake-combine-steering-logic \ + > "$F_HANDSHAKE_SQ" + exit_on_fail "Failed to apply Straight to the Queue" "Applied Straight to the Queue" + + F_HANDSHAKE=$F_HANDSHAKE_SQ + + # handshake transformations + "$DYNAMATIC_OPT_BIN" "$F_HANDSHAKE" \ + --handshake-minimize-cst-width --handshake-optimize-bitwidths \ + --handshake-materialize --handshake-infer-basic-blocks \ + > "$F_HANDSHAKE_TRANSFORMED" + exit_on_fail "Failed to apply transformations to handshake" \ + "Applied transformations to handshake" + +else + + # handshake transformations + "$DYNAMATIC_OPT_BIN" "$F_HANDSHAKE" \ + --handshake-analyze-lsq-usage --handshake-replace-memory-interfaces \ + --handshake-minimize-cst-width --handshake-optimize-bitwidths \ + --handshake-materialize --handshake-infer-basic-blocks \ + > "$F_HANDSHAKE_TRANSFORMED" + exit_on_fail "Failed to apply transformations to handshake" \ + "Applied transformations to handshake" +fi # Credit-based sharing if [[ $USE_SHARING -ne 0 ]]; then