Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion vpr/src/analytical_place/global_placer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,15 @@ PartialPlacement SimPLGlobalPlacer::place() {

// Exit condition: If the upper-bound and lower-bound HPWLs are
// sufficiently close together then stop.
double hpwl_relative_gap = (ub_hpwl - lb_hpwl) / ub_hpwl;
double hpwl_gap = ub_hpwl - lb_hpwl;
double hpwl_relative_gap;
if (ub_hpwl != 0.0)
hpwl_relative_gap = hpwl_gap / ub_hpwl;
else if (lb_hpwl != 0.0)
hpwl_relative_gap = hpwl_gap / lb_hpwl;
else
hpwl_relative_gap = 0.0;

if (hpwl_relative_gap < target_hpwl_relative_gap_)
break;
}
Expand Down
6 changes: 5 additions & 1 deletion vpr/src/base/flat_placement_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,14 @@ inline float get_manhattan_distance_to_tile(const t_flat_pl_loc& src_flat_loc,
// the src_flat_loc. To do this, we project the point in L1 space.
float proj_x = std::clamp(src_flat_loc.x, tile_xmin, tile_xmax);
float proj_y = std::clamp(src_flat_loc.y, tile_ymin, tile_ymax);
// Note: We assume that tiles do not cross layers, so the projected layer
// is just the layer that contains the tile.
float proj_layer = tile_loc.layer_num;

// Then compute the L1 distance from the src_flat_loc to the projected
// position. This will be the minimum distance this point needs to move.
float dx = std::abs(proj_x - src_flat_loc.x);
float dy = std::abs(proj_y - src_flat_loc.y);
return dx + dy;
float dlayer = std::abs(proj_layer - src_flat_loc.layer);
return dx + dy + dlayer;
}
60 changes: 37 additions & 23 deletions vpr/src/pack/greedy_candidate_selector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,23 +153,23 @@ void GreedyCandidateSelector::initialize_unrelated_clustering_data(const t_molec
max_loc.layer = std::max(max_loc.layer, mol_pos.layer);
}

VTR_ASSERT_MSG(max_loc.layer == 0,
"APPack unrelated clustering does not support 3D "
"FPGAs yet");

// Initialize the data structure with empty arrays with enough space
// for each molecule.
size_t flat_grid_num_layers = max_loc.layer + 1;
size_t flat_grid_width = max_loc.x + 1;
size_t flat_grid_height = max_loc.y + 1;
appack_unrelated_clustering_data_ =
vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 2>({flat_grid_width,
vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 3>({flat_grid_num_layers,
flat_grid_width,
flat_grid_height});
for (size_t x = 0; x < flat_grid_width; x++) {
for (size_t y = 0; y < flat_grid_height; y++) {
// Resize to the maximum number of used external pins. This is
// to ensure that every molecule below can be inserted into a
// valid list based on their number of external pins.
appack_unrelated_clustering_data_[x][y].resize(max_molecule_stats.num_used_ext_pins + 1);
for (size_t layer_num = 0; layer_num < flat_grid_num_layers; layer_num++) {
for (size_t x = 0; x < flat_grid_width; x++) {
for (size_t y = 0; y < flat_grid_height; y++) {
// Resize to the maximum number of used external pins. This is
// to ensure that every molecule below can be inserted into a
// valid list based on their number of external pins.
appack_unrelated_clustering_data_[layer_num][x][y].resize(max_molecule_stats.num_used_ext_pins + 1);
}
}
}

Expand All @@ -185,7 +185,7 @@ void GreedyCandidateSelector::initialize_unrelated_clustering_data(const t_molec
int ext_inps = molecule_stats.num_used_ext_inputs;

//Insert the molecule into the unclustered lists by number of external inputs
auto& tile_uc_data = appack_unrelated_clustering_data_[mol_pos.x][mol_pos.y];
auto& tile_uc_data = appack_unrelated_clustering_data_[mol_pos.layer][mol_pos.x][mol_pos.y];
tile_uc_data[ext_inps].push_back(mol_id);
}
} else {
Expand Down Expand Up @@ -1258,21 +1258,33 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa
// to the max number of inputs a molecule could have.
size_t inputs_avail = cluster_legalizer.get_num_cluster_inputs_available(cluster_id);
VTR_ASSERT_SAFE(!appack_unrelated_clustering_data_.empty());
size_t max_molecule_inputs_avail = appack_unrelated_clustering_data_[0][0].size() - 1;
size_t max_molecule_inputs_avail = appack_unrelated_clustering_data_[0][0][0].size() - 1;
size_t flat_grid_num_layers = appack_unrelated_clustering_data_.dim_size(0);
size_t flat_grid_width = appack_unrelated_clustering_data_.dim_size(1);
size_t flat_grid_height = appack_unrelated_clustering_data_.dim_size(2);
if (inputs_avail >= max_molecule_inputs_avail) {
inputs_avail = max_molecule_inputs_avail;
}

// Create a queue of locations to search and a map of visited grid locations.
std::queue<t_physical_tile_loc> search_queue;
vtr::NdMatrix<bool, 2> visited({appack_unrelated_clustering_data_.dim_size(0),
appack_unrelated_clustering_data_.dim_size(1)},
false);
// Push the position of the cluster to the queue.
vtr::NdMatrix<bool, 3> visited({flat_grid_num_layers,
flat_grid_width,
flat_grid_height},
false);

t_physical_tile_loc cluster_tile_loc(cluster_gain_stats.flat_cluster_position.x,
cluster_gain_stats.flat_cluster_position.y,
cluster_gain_stats.flat_cluster_position.layer);
search_queue.push(cluster_tile_loc);

// Push the position of the cluster to the queue. We push this position on
// each layer such that each layer is searched independently.
for (size_t layer_num = 0; layer_num < flat_grid_num_layers; layer_num++) {
t_physical_tile_loc tile_loc(cluster_tile_loc.x,
cluster_tile_loc.y,
layer_num);
search_queue.push(tile_loc);
}

// Get the max unrelated tile distance for the block type of this cluster.
t_logical_block_type_ptr cluster_type = cluster_legalizer.get_cluster_type(cluster_id);
Expand All @@ -1288,10 +1300,12 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa
while (!search_queue.empty()) {
// Pop a position to search from the queue.
const t_physical_tile_loc& node_loc = search_queue.front();
VTR_ASSERT_SAFE(node_loc.layer_num == 0);

// Get the distance from the cluster to the current tile in tiles.
float dist = std::abs(node_loc.x - cluster_tile_loc.x) + std::abs(node_loc.y - cluster_tile_loc.y);
float node_dx = std::abs(node_loc.x - cluster_tile_loc.x);
float node_dy = std::abs(node_loc.y - cluster_tile_loc.y);
float node_dlayer = std::abs(node_loc.layer_num - cluster_tile_loc.layer_num);
float dist = node_dx + node_dy + node_dlayer;

// If this position is too far from the source, skip it.
if (dist > max_dist) {
Expand All @@ -1309,18 +1323,18 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa
}

// If this position has been visited, skip it.
if (visited[node_loc.x][node_loc.y]) {
if (visited[node_loc.layer_num][node_loc.x][node_loc.y]) {
search_queue.pop();
continue;
}
visited[node_loc.x][node_loc.y] = true;
visited[node_loc.layer_num][node_loc.x][node_loc.y] = true;

// Explore this position from highest number of inputs available to lowest.
// Here, we are trying to find the closest compatible molecule, where we
// break ties based on whoever has more external inputs.
PackMoleculeId best_candidate = PackMoleculeId::INVALID();
float best_candidate_distance = std::numeric_limits<float>::max();
const auto& uc_data = appack_unrelated_clustering_data_[node_loc.x][node_loc.y];
const auto& uc_data = appack_unrelated_clustering_data_[node_loc.layer_num][node_loc.x][node_loc.y];
VTR_ASSERT_SAFE(inputs_avail < uc_data.size());
for (int ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) {
// Get the molecule by the number of external inputs.
Expand Down
6 changes: 3 additions & 3 deletions vpr/src/pack/greedy_candidate_selector.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,14 +596,14 @@ class GreedyCandidateSelector {
/// @brief Data pre-computed to help select unrelated molecules when APPack
/// is being used. This is the same data as unrelated_clustering_data_,
/// but it is spatially distributed over the device.
/// For each grid location on the device (x, y), this provides a list of
/// For each grid location on the device (layer, x, y), this provides a list of
/// molecules sorted by their gain, where the first dimension is the number
/// of external outputs of the molecule.
/// When APPack is not used, this will be uninitialized.
/// [0..flat_grid_width][0..flat_grid_height][0..max_num_used_ext_pins]
/// [0..flat_grid_num_layers][0..flat_grid_width][0..flat_grid_height][0..max_num_used_ext_pins]
/// Here, flat_grid width/height is the maximum x and y positions given in
/// the flat placement.
vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 2> appack_unrelated_clustering_data_;
vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 3> appack_unrelated_clustering_data_;

/// @brief The APPack state which contains the options used to configure
/// APPack and the flat placement.
Expand Down
40 changes: 23 additions & 17 deletions vpr/src/place/initial_placement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -738,35 +738,39 @@ static inline t_pl_loc find_nearest_compatible_loc(const t_flat_pl_loc& src_flat
const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index];
const DeviceGrid& device_grid = g_vpr_ctx.device().grid;
const int num_layers = device_grid.get_num_layers();
// This method does not support 3D FPGAs yet. The search performed will only
// traverse the same layer as the src_loc.
VTR_ASSERT(num_layers == 1);
constexpr int layer = 0;

// Get the closest (approximately) compressed location to the src location.
// This does not need to be perfect (in fact I do not think it is), but the
// closer it is, the faster the BFS will find the best solution.
t_physical_tile_loc src_grid_loc(src_flat_loc.x, src_flat_loc.y, src_flat_loc.layer);
const t_physical_tile_loc compressed_src_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx(src_grid_loc);

// Weighted-BFS search the compressed grid for an empty compatible subtile.
size_t num_rows = compressed_block_grid.get_num_rows(layer);
size_t num_cols = compressed_block_grid.get_num_columns(layer);
vtr::NdMatrix<bool, 2> visited({num_cols, num_rows}, false);
std::vector<vtr::NdMatrix<bool, 2>> per_layer_visited(num_layers);
for (int layer = 0; layer < num_layers; layer++) {
size_t num_rows = compressed_block_grid.get_num_rows(layer);
size_t num_cols = compressed_block_grid.get_num_columns(layer);
per_layer_visited[layer].resize({num_cols, num_rows}, false);
}
float best_dist = std::numeric_limits<float>::max();
t_pl_loc best_loc(OPEN, OPEN, OPEN, OPEN);

// Get the closest (approximately) compressed location to the src location
// on each layer and enqueue them. We only want to enqueue locations onto
// layers that can feasibly implement this block.
// This does not need to be perfect (in fact I do not think it is), but the
// closer it is, the faster the BFS will find the best solution.
std::queue<t_physical_tile_loc> loc_queue;
loc_queue.push(compressed_src_loc);
for (int layer_num : compressed_block_grid.get_layer_nums()) {
t_physical_tile_loc src_grid_loc(src_flat_loc.x, src_flat_loc.y, layer_num);
const t_physical_tile_loc compressed_src_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx(src_grid_loc);
if (compressed_src_loc.x != OPEN && compressed_src_loc.y != OPEN)
loc_queue.push(compressed_src_loc);
}

while (!loc_queue.empty()) {
// Pop the top element off the queue.
t_physical_tile_loc loc = loc_queue.front();
loc_queue.pop();

// If this location has already been visited, skip it.
if (visited[loc.x][loc.y])
if (per_layer_visited[loc.layer_num][loc.x][loc.y])
continue;
visited[loc.x][loc.y] = true;
per_layer_visited[loc.layer_num][loc.x][loc.y] = true;

// Get the minimum distance the cluster would need to move (relative to
// its global placement solution) to be within the tile at the given
Expand Down Expand Up @@ -795,7 +799,7 @@ static inline t_pl_loc find_nearest_compatible_loc(const t_flat_pl_loc& src_flat
// (i.e. no tile exists there). This is fine, we just need to check for
// them to ensure we never try to put a cluster there.
bool is_valid_compressed_loc = false;
const auto& compressed_col_blk_map = compressed_block_grid.get_column_block_map(loc.x, layer);
const auto& compressed_col_blk_map = compressed_block_grid.get_column_block_map(loc.x, loc.layer_num);
if (compressed_col_blk_map.count(loc.y) != 0)
is_valid_compressed_loc = true;

Expand Down Expand Up @@ -837,6 +841,8 @@ static inline t_pl_loc find_nearest_compatible_loc(const t_flat_pl_loc& src_flat
// been visited. The code above checks for these cases to prevent extra
// work and invalid lookups. This must be done this way to ensure that
// the closest location can be found efficiently.
size_t num_rows = compressed_block_grid.get_num_rows(loc.layer_num);
size_t num_cols = compressed_block_grid.get_num_columns(loc.layer_num);
if (loc.x > 0) {
t_physical_tile_loc new_comp_loc = t_physical_tile_loc(loc.x - 1,
loc.y,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
##############################################
# Configuration file for running experiments
##############################################

# Path to directory of circuits to use
circuits_dir=benchmarks/blif/4

# Path to directory of architectures to use
archs_dir=arch/multi_die/simple_arch

# Add architectures to list to sweep
arch_list_add=3d_k4_N4_90nm.xml

# Add circuits to list to sweep
# This is a sweep of blif files which pack to a density between 50% and 90% of
# the max density on this device.
circuit_list_add=s820.blif
circuit_list_add=s838.1.blif
circuit_list_add=bw.blif
circuit_list_add=rd84.blif
circuit_list_add=s832.blif
circuit_list_add=mm9a.blif
circuit_list_add=alu2.blif
circuit_list_add=x1.blif
circuit_list_add=t481.blif
circuit_list_add=mm9b.blif
circuit_list_add=styr.blif
circuit_list_add=s953.blif

# Parse info and how to parse
parse_file=vpr_fixed_chan_width.txt

# How to parse QoR info
qor_parse_file=qor_ap_fixed_chan_width.txt

# Pass requirements
pass_requirements_file=pass_requirements_ap_fixed_chan_width.txt

script_params_common=-starting_stage vpr -track_memory_usage --analytical_place --route --device FPGA3D --route_chan_width 100

# Test only the packer and the initial placer of the AP flow.
script_params_list_add=--ap_analytical_solver identity --ap_partial_legalizer none
# Force unrelated clustering on.
script_params_list_add=--ap_analytical_solver identity --ap_partial_legalizer none --allow_unrelated_clustering on

Loading