Skip to content

Commit 7743e68

Browse files
authored
Merge pull request #3244 from AlexandreSinger/feature-ap-full-none-3d
[APPack][3D] 3D APPack Support
2 parents 51c025a + ce8be01 commit 7743e68

File tree

8 files changed

+148
-45
lines changed

8 files changed

+148
-45
lines changed

vpr/src/analytical_place/global_placer.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,15 @@ PartialPlacement SimPLGlobalPlacer::place() {
411411

412412
// Exit condition: If the upper-bound and lower-bound HPWLs are
413413
// sufficiently close together then stop.
414-
double hpwl_relative_gap = (ub_hpwl - lb_hpwl) / ub_hpwl;
414+
double hpwl_gap = ub_hpwl - lb_hpwl;
415+
double hpwl_relative_gap;
416+
if (ub_hpwl != 0.0)
417+
hpwl_relative_gap = hpwl_gap / ub_hpwl;
418+
else if (lb_hpwl != 0.0)
419+
hpwl_relative_gap = hpwl_gap / lb_hpwl;
420+
else
421+
hpwl_relative_gap = 0.0;
422+
415423
if (hpwl_relative_gap < target_hpwl_relative_gap_)
416424
break;
417425
}

vpr/src/base/flat_placement_utils.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,14 @@ inline float get_manhattan_distance_to_tile(const t_flat_pl_loc& src_flat_loc,
4141
// the src_flat_loc. To do this, we project the point in L1 space.
4242
float proj_x = std::clamp(src_flat_loc.x, tile_xmin, tile_xmax);
4343
float proj_y = std::clamp(src_flat_loc.y, tile_ymin, tile_ymax);
44+
// Note: We assume that tiles do not cross layers, so the projected layer
45+
// is just the layer that contains the tile.
46+
float proj_layer = tile_loc.layer_num;
4447

4548
// Then compute the L1 distance from the src_flat_loc to the projected
4649
// position. This will be the minimum distance this point needs to move.
4750
float dx = std::abs(proj_x - src_flat_loc.x);
4851
float dy = std::abs(proj_y - src_flat_loc.y);
49-
return dx + dy;
52+
float dlayer = std::abs(proj_layer - src_flat_loc.layer);
53+
return dx + dy + dlayer;
5054
}

vpr/src/pack/greedy_candidate_selector.cpp

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -153,23 +153,23 @@ void GreedyCandidateSelector::initialize_unrelated_clustering_data(const t_molec
153153
max_loc.layer = std::max(max_loc.layer, mol_pos.layer);
154154
}
155155

156-
VTR_ASSERT_MSG(max_loc.layer == 0,
157-
"APPack unrelated clustering does not support 3D "
158-
"FPGAs yet");
159-
160156
// Initialize the data structure with empty arrays with enough space
161157
// for each molecule.
158+
size_t flat_grid_num_layers = max_loc.layer + 1;
162159
size_t flat_grid_width = max_loc.x + 1;
163160
size_t flat_grid_height = max_loc.y + 1;
164161
appack_unrelated_clustering_data_ =
165-
vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 2>({flat_grid_width,
162+
vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 3>({flat_grid_num_layers,
163+
flat_grid_width,
166164
flat_grid_height});
167-
for (size_t x = 0; x < flat_grid_width; x++) {
168-
for (size_t y = 0; y < flat_grid_height; y++) {
169-
// Resize to the maximum number of used external pins. This is
170-
// to ensure that every molecule below can be inserted into a
171-
// valid list based on their number of external pins.
172-
appack_unrelated_clustering_data_[x][y].resize(max_molecule_stats.num_used_ext_pins + 1);
165+
for (size_t layer_num = 0; layer_num < flat_grid_num_layers; layer_num++) {
166+
for (size_t x = 0; x < flat_grid_width; x++) {
167+
for (size_t y = 0; y < flat_grid_height; y++) {
168+
// Resize to the maximum number of used external pins. This is
169+
// to ensure that every molecule below can be inserted into a
170+
// valid list based on their number of external pins.
171+
appack_unrelated_clustering_data_[layer_num][x][y].resize(max_molecule_stats.num_used_ext_pins + 1);
172+
}
173173
}
174174
}
175175

@@ -185,7 +185,7 @@ void GreedyCandidateSelector::initialize_unrelated_clustering_data(const t_molec
185185
int ext_inps = molecule_stats.num_used_ext_inputs;
186186

187187
//Insert the molecule into the unclustered lists by number of external inputs
188-
auto& tile_uc_data = appack_unrelated_clustering_data_[mol_pos.x][mol_pos.y];
188+
auto& tile_uc_data = appack_unrelated_clustering_data_[mol_pos.layer][mol_pos.x][mol_pos.y];
189189
tile_uc_data[ext_inps].push_back(mol_id);
190190
}
191191
} else {
@@ -1258,21 +1258,33 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa
12581258
// to the max number of inputs a molecule could have.
12591259
size_t inputs_avail = cluster_legalizer.get_num_cluster_inputs_available(cluster_id);
12601260
VTR_ASSERT_SAFE(!appack_unrelated_clustering_data_.empty());
1261-
size_t max_molecule_inputs_avail = appack_unrelated_clustering_data_[0][0].size() - 1;
1261+
size_t max_molecule_inputs_avail = appack_unrelated_clustering_data_[0][0][0].size() - 1;
1262+
size_t flat_grid_num_layers = appack_unrelated_clustering_data_.dim_size(0);
1263+
size_t flat_grid_width = appack_unrelated_clustering_data_.dim_size(1);
1264+
size_t flat_grid_height = appack_unrelated_clustering_data_.dim_size(2);
12621265
if (inputs_avail >= max_molecule_inputs_avail) {
12631266
inputs_avail = max_molecule_inputs_avail;
12641267
}
12651268

12661269
// Create a queue of locations to search and a map of visited grid locations.
12671270
std::queue<t_physical_tile_loc> search_queue;
1268-
vtr::NdMatrix<bool, 2> visited({appack_unrelated_clustering_data_.dim_size(0),
1269-
appack_unrelated_clustering_data_.dim_size(1)},
1270-
false);
1271-
// Push the position of the cluster to the queue.
1271+
vtr::NdMatrix<bool, 3> visited({flat_grid_num_layers,
1272+
flat_grid_width,
1273+
flat_grid_height},
1274+
false);
1275+
12721276
t_physical_tile_loc cluster_tile_loc(cluster_gain_stats.flat_cluster_position.x,
12731277
cluster_gain_stats.flat_cluster_position.y,
12741278
cluster_gain_stats.flat_cluster_position.layer);
1275-
search_queue.push(cluster_tile_loc);
1279+
1280+
// Push the position of the cluster to the queue. We push this position on
1281+
// each layer such that each layer is searched independently.
1282+
for (size_t layer_num = 0; layer_num < flat_grid_num_layers; layer_num++) {
1283+
t_physical_tile_loc tile_loc(cluster_tile_loc.x,
1284+
cluster_tile_loc.y,
1285+
layer_num);
1286+
search_queue.push(tile_loc);
1287+
}
12761288

12771289
// Get the max unrelated tile distance for the block type of this cluster.
12781290
t_logical_block_type_ptr cluster_type = cluster_legalizer.get_cluster_type(cluster_id);
@@ -1288,10 +1300,12 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa
12881300
while (!search_queue.empty()) {
12891301
// Pop a position to search from the queue.
12901302
const t_physical_tile_loc& node_loc = search_queue.front();
1291-
VTR_ASSERT_SAFE(node_loc.layer_num == 0);
12921303

12931304
// Get the distance from the cluster to the current tile in tiles.
1294-
float dist = std::abs(node_loc.x - cluster_tile_loc.x) + std::abs(node_loc.y - cluster_tile_loc.y);
1305+
float node_dx = std::abs(node_loc.x - cluster_tile_loc.x);
1306+
float node_dy = std::abs(node_loc.y - cluster_tile_loc.y);
1307+
float node_dlayer = std::abs(node_loc.layer_num - cluster_tile_loc.layer_num);
1308+
float dist = node_dx + node_dy + node_dlayer;
12951309

12961310
// If this position is too far from the source, skip it.
12971311
if (dist > max_dist) {
@@ -1309,18 +1323,18 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa
13091323
}
13101324

13111325
// If this position has been visited, skip it.
1312-
if (visited[node_loc.x][node_loc.y]) {
1326+
if (visited[node_loc.layer_num][node_loc.x][node_loc.y]) {
13131327
search_queue.pop();
13141328
continue;
13151329
}
1316-
visited[node_loc.x][node_loc.y] = true;
1330+
visited[node_loc.layer_num][node_loc.x][node_loc.y] = true;
13171331

13181332
// Explore this position from highest number of inputs available to lowest.
13191333
// Here, we are trying to find the closest compatible molecule, where we
13201334
// break ties based on whoever has more external inputs.
13211335
PackMoleculeId best_candidate = PackMoleculeId::INVALID();
13221336
float best_candidate_distance = std::numeric_limits<float>::max();
1323-
const auto& uc_data = appack_unrelated_clustering_data_[node_loc.x][node_loc.y];
1337+
const auto& uc_data = appack_unrelated_clustering_data_[node_loc.layer_num][node_loc.x][node_loc.y];
13241338
VTR_ASSERT_SAFE(inputs_avail < uc_data.size());
13251339
for (int ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) {
13261340
// Get the molecule by the number of external inputs.

vpr/src/pack/greedy_candidate_selector.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -596,14 +596,14 @@ class GreedyCandidateSelector {
596596
/// @brief Data pre-computed to help select unrelated molecules when APPack
597597
/// is being used. This is the same data as unrelated_clustering_data_,
598598
/// but it is spatially distributed over the device.
599-
/// For each grid location on the device (x, y), this provides a list of
599+
/// For each grid location on the device (layer, x, y), this provides a list of
600600
/// molecules sorted by their gain, where the first dimension is the number
601601
/// of external outputs of the molecule.
602602
/// When APPack is not used, this will be uninitialized.
603-
/// [0..flat_grid_width][0..flat_grid_height][0..max_num_used_ext_pins]
603+
/// [0..flat_grid_num_layers][0..flat_grid_width][0..flat_grid_height][0..max_num_used_ext_pins]
604604
/// Here, flat_grid width/height is the maximum x and y positions given in
605605
/// the flat placement.
606-
vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 2> appack_unrelated_clustering_data_;
606+
vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 3> appack_unrelated_clustering_data_;
607607

608608
/// @brief The APPack state which contains the options used to configure
609609
/// APPack and the flat placement.

vpr/src/place/initial_placement.cpp

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -738,35 +738,39 @@ static inline t_pl_loc find_nearest_compatible_loc(const t_flat_pl_loc& src_flat
738738
const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index];
739739
const DeviceGrid& device_grid = g_vpr_ctx.device().grid;
740740
const int num_layers = device_grid.get_num_layers();
741-
// This method does not support 3D FPGAs yet. The search performed will only
742-
// traverse the same layer as the src_loc.
743-
VTR_ASSERT(num_layers == 1);
744-
constexpr int layer = 0;
745-
746-
// Get the closest (approximately) compressed location to the src location.
747-
// This does not need to be perfect (in fact I do not think it is), but the
748-
// closer it is, the faster the BFS will find the best solution.
749-
t_physical_tile_loc src_grid_loc(src_flat_loc.x, src_flat_loc.y, src_flat_loc.layer);
750-
const t_physical_tile_loc compressed_src_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx(src_grid_loc);
751741

752742
// Weighted-BFS search the compressed grid for an empty compatible subtile.
753-
size_t num_rows = compressed_block_grid.get_num_rows(layer);
754-
size_t num_cols = compressed_block_grid.get_num_columns(layer);
755-
vtr::NdMatrix<bool, 2> visited({num_cols, num_rows}, false);
743+
std::vector<vtr::NdMatrix<bool, 2>> per_layer_visited(num_layers);
744+
for (int layer = 0; layer < num_layers; layer++) {
745+
size_t num_rows = compressed_block_grid.get_num_rows(layer);
746+
size_t num_cols = compressed_block_grid.get_num_columns(layer);
747+
per_layer_visited[layer].resize({num_cols, num_rows}, false);
748+
}
756749
float best_dist = std::numeric_limits<float>::max();
757750
t_pl_loc best_loc(OPEN, OPEN, OPEN, OPEN);
758751

752+
// Get the closest (approximately) compressed location to the src location
753+
// on each layer and enqueue them. We only want to enqueue locations onto
754+
// layers that can feasibly implement this block.
755+
// This does not need to be perfect (in fact I do not think it is), but the
756+
// closer it is, the faster the BFS will find the best solution.
759757
std::queue<t_physical_tile_loc> loc_queue;
760-
loc_queue.push(compressed_src_loc);
758+
for (int layer_num : compressed_block_grid.get_layer_nums()) {
759+
t_physical_tile_loc src_grid_loc(src_flat_loc.x, src_flat_loc.y, layer_num);
760+
const t_physical_tile_loc compressed_src_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx(src_grid_loc);
761+
if (compressed_src_loc.x != OPEN && compressed_src_loc.y != OPEN)
762+
loc_queue.push(compressed_src_loc);
763+
}
764+
761765
while (!loc_queue.empty()) {
762766
// Pop the top element off the queue.
763767
t_physical_tile_loc loc = loc_queue.front();
764768
loc_queue.pop();
765769

766770
// If this location has already been visited, skip it.
767-
if (visited[loc.x][loc.y])
771+
if (per_layer_visited[loc.layer_num][loc.x][loc.y])
768772
continue;
769-
visited[loc.x][loc.y] = true;
773+
per_layer_visited[loc.layer_num][loc.x][loc.y] = true;
770774

771775
// Get the minimum distance the cluster would need to move (relative to
772776
// its global placement solution) to be within the tile at the given
@@ -795,7 +799,7 @@ static inline t_pl_loc find_nearest_compatible_loc(const t_flat_pl_loc& src_flat
795799
// (i.e. no tile exists there). This is fine, we just need to check for
796800
// them to ensure we never try to put a cluster there.
797801
bool is_valid_compressed_loc = false;
798-
const auto& compressed_col_blk_map = compressed_block_grid.get_column_block_map(loc.x, layer);
802+
const auto& compressed_col_blk_map = compressed_block_grid.get_column_block_map(loc.x, loc.layer_num);
799803
if (compressed_col_blk_map.count(loc.y) != 0)
800804
is_valid_compressed_loc = true;
801805

@@ -837,6 +841,8 @@ static inline t_pl_loc find_nearest_compatible_loc(const t_flat_pl_loc& src_flat
837841
// been visited. The code above checks for these cases to prevent extra
838842
// work and invalid lookups. This must be done this way to ensure that
839843
// the closest location can be found efficiently.
844+
size_t num_rows = compressed_block_grid.get_num_rows(loc.layer_num);
845+
size_t num_cols = compressed_block_grid.get_num_columns(loc.layer_num);
840846
if (loc.x > 0) {
841847
t_physical_tile_loc new_comp_loc = t_physical_tile_loc(loc.x - 1,
842848
loc.y,
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
##############################################
2+
# Configuration file for running experiments
3+
##############################################
4+
5+
# Path to directory of circuits to use
6+
circuits_dir=benchmarks/blif/4
7+
8+
# Path to directory of architectures to use
9+
archs_dir=arch/multi_die/simple_arch
10+
11+
# Add architectures to list to sweep
12+
arch_list_add=3d_k4_N4_90nm.xml
13+
14+
# Add circuits to list to sweep
15+
# This is a sweep of blif files which pack to a density between 50% and 90% of
16+
# the max density on this device.
17+
circuit_list_add=s820.blif
18+
circuit_list_add=s838.1.blif
19+
circuit_list_add=bw.blif
20+
circuit_list_add=rd84.blif
21+
circuit_list_add=s832.blif
22+
circuit_list_add=mm9a.blif
23+
circuit_list_add=alu2.blif
24+
circuit_list_add=x1.blif
25+
circuit_list_add=t481.blif
26+
circuit_list_add=mm9b.blif
27+
circuit_list_add=styr.blif
28+
circuit_list_add=s953.blif
29+
30+
# Parse info and how to parse
31+
parse_file=vpr_fixed_chan_width.txt
32+
33+
# How to parse QoR info
34+
qor_parse_file=qor_ap_fixed_chan_width.txt
35+
36+
# Pass requirements
37+
pass_requirements_file=pass_requirements_ap_fixed_chan_width.txt
38+
39+
script_params_common=-starting_stage vpr -track_memory_usage --analytical_place --route --device FPGA3D --route_chan_width 100
40+
41+
# Test only the packer and the initial placer of the AP flow.
42+
script_params_list_add=--ap_analytical_solver identity --ap_partial_legalizer none
43+
# Force unrelated clustering on.
44+
script_params_list_add=--ap_analytical_solver identity --ap_partial_legalizer none --allow_unrelated_clustering on
45+

0 commit comments

Comments
 (0)