@@ -153,23 +153,23 @@ void GreedyCandidateSelector::initialize_unrelated_clustering_data(const t_molec
153
153
max_loc.layer = std::max (max_loc.layer , mol_pos.layer );
154
154
}
155
155
156
- VTR_ASSERT_MSG (max_loc.layer == 0 ,
157
- " APPack unrelated clustering does not support 3D "
158
- " FPGAs yet" );
159
-
160
156
// Initialize the data structure with empty arrays with enough space
161
157
// for each molecule.
158
+ size_t flat_grid_num_layers = max_loc.layer + 1 ;
162
159
size_t flat_grid_width = max_loc.x + 1 ;
163
160
size_t flat_grid_height = max_loc.y + 1 ;
164
161
appack_unrelated_clustering_data_ =
165
- vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 2 >({flat_grid_width,
162
+ vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 3 >({flat_grid_num_layers,
163
+ flat_grid_width,
166
164
flat_grid_height});
167
- for (size_t x = 0 ; x < flat_grid_width; x++) {
168
- for (size_t y = 0 ; y < flat_grid_height; y++) {
169
- // Resize to the maximum number of used external pins. This is
170
- // to ensure that every molecule below can be inserted into a
171
- // valid list based on their number of external pins.
172
- appack_unrelated_clustering_data_[x][y].resize (max_molecule_stats.num_used_ext_pins + 1 );
165
+ for (size_t layer_num = 0 ; layer_num < flat_grid_num_layers; layer_num++) {
166
+ for (size_t x = 0 ; x < flat_grid_width; x++) {
167
+ for (size_t y = 0 ; y < flat_grid_height; y++) {
168
+ // Resize to the maximum number of used external pins. This is
169
+ // to ensure that every molecule below can be inserted into a
170
+ // valid list based on their number of external pins.
171
+ appack_unrelated_clustering_data_[layer_num][x][y].resize (max_molecule_stats.num_used_ext_pins + 1 );
172
+ }
173
173
}
174
174
}
175
175
@@ -185,7 +185,7 @@ void GreedyCandidateSelector::initialize_unrelated_clustering_data(const t_molec
185
185
int ext_inps = molecule_stats.num_used_ext_inputs ;
186
186
187
187
// Insert the molecule into the unclustered lists by number of external inputs
188
- auto & tile_uc_data = appack_unrelated_clustering_data_[mol_pos.x ][mol_pos.y ];
188
+ auto & tile_uc_data = appack_unrelated_clustering_data_[mol_pos.layer ][mol_pos. x ][mol_pos.y ];
189
189
tile_uc_data[ext_inps].push_back (mol_id);
190
190
}
191
191
} else {
@@ -1258,21 +1258,33 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa
1258
1258
// to the max number of inputs a molecule could have.
1259
1259
size_t inputs_avail = cluster_legalizer.get_num_cluster_inputs_available (cluster_id);
1260
1260
VTR_ASSERT_SAFE (!appack_unrelated_clustering_data_.empty ());
1261
- size_t max_molecule_inputs_avail = appack_unrelated_clustering_data_[0 ][0 ].size () - 1 ;
1261
+ size_t max_molecule_inputs_avail = appack_unrelated_clustering_data_[0 ][0 ][0 ].size () - 1 ;
1262
+ size_t flat_grid_num_layers = appack_unrelated_clustering_data_.dim_size (0 );
1263
+ size_t flat_grid_width = appack_unrelated_clustering_data_.dim_size (1 );
1264
+ size_t flat_grid_height = appack_unrelated_clustering_data_.dim_size (2 );
1262
1265
if (inputs_avail >= max_molecule_inputs_avail) {
1263
1266
inputs_avail = max_molecule_inputs_avail;
1264
1267
}
1265
1268
1266
1269
// Create a queue of locations to search and a map of visited grid locations.
1267
1270
std::queue<t_physical_tile_loc> search_queue;
1268
- vtr::NdMatrix<bool , 2 > visited ({appack_unrelated_clustering_data_.dim_size (0 ),
1269
- appack_unrelated_clustering_data_.dim_size (1 )},
1270
- false );
1271
- // Push the position of the cluster to the queue.
1271
+ vtr::NdMatrix<bool , 3 > visited ({flat_grid_num_layers,
1272
+ flat_grid_width,
1273
+ flat_grid_height},
1274
+ false );
1275
+
1272
1276
t_physical_tile_loc cluster_tile_loc (cluster_gain_stats.flat_cluster_position .x ,
1273
1277
cluster_gain_stats.flat_cluster_position .y ,
1274
1278
cluster_gain_stats.flat_cluster_position .layer );
1275
- search_queue.push (cluster_tile_loc);
1279
+
1280
+ // Push the position of the cluster to the queue. We push this position on
1281
+ // each layer such that each layer is searched independently.
1282
+ for (size_t layer_num = 0 ; layer_num < flat_grid_num_layers; layer_num++) {
1283
+ t_physical_tile_loc tile_loc (cluster_tile_loc.x ,
1284
+ cluster_tile_loc.y ,
1285
+ layer_num);
1286
+ search_queue.push (tile_loc);
1287
+ }
1276
1288
1277
1289
// Get the max unrelated tile distance for the block type of this cluster.
1278
1290
t_logical_block_type_ptr cluster_type = cluster_legalizer.get_cluster_type (cluster_id);
@@ -1288,10 +1300,12 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa
1288
1300
while (!search_queue.empty ()) {
1289
1301
// Pop a position to search from the queue.
1290
1302
const t_physical_tile_loc& node_loc = search_queue.front ();
1291
- VTR_ASSERT_SAFE (node_loc.layer_num == 0 );
1292
1303
1293
1304
// Get the distance from the cluster to the current tile in tiles.
1294
- float dist = std::abs (node_loc.x - cluster_tile_loc.x ) + std::abs (node_loc.y - cluster_tile_loc.y );
1305
+ float node_dx = std::abs (node_loc.x - cluster_tile_loc.x );
1306
+ float node_dy = std::abs (node_loc.y - cluster_tile_loc.y );
1307
+ float node_dlayer = std::abs (node_loc.layer_num - cluster_tile_loc.layer_num );
1308
+ float dist = node_dx + node_dy + node_dlayer;
1295
1309
1296
1310
// If this position is too far from the source, skip it.
1297
1311
if (dist > max_dist) {
@@ -1309,18 +1323,18 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa
1309
1323
}
1310
1324
1311
1325
// If this position has been visited, skip it.
1312
- if (visited[node_loc.x ][node_loc.y ]) {
1326
+ if (visited[node_loc.layer_num ][node_loc. x ][node_loc.y ]) {
1313
1327
search_queue.pop ();
1314
1328
continue ;
1315
1329
}
1316
- visited[node_loc.x ][node_loc.y ] = true ;
1330
+ visited[node_loc.layer_num ][node_loc. x ][node_loc.y ] = true ;
1317
1331
1318
1332
// Explore this position from highest number of inputs available to lowest.
1319
1333
// Here, we are trying to find the closest compatible molecule, where we
1320
1334
// break ties based on whoever has more external inputs.
1321
1335
PackMoleculeId best_candidate = PackMoleculeId::INVALID ();
1322
1336
float best_candidate_distance = std::numeric_limits<float >::max ();
1323
- const auto & uc_data = appack_unrelated_clustering_data_[node_loc.x ][node_loc.y ];
1337
+ const auto & uc_data = appack_unrelated_clustering_data_[node_loc.layer_num ][node_loc. x ][node_loc.y ];
1324
1338
VTR_ASSERT_SAFE (inputs_avail < uc_data.size ());
1325
1339
for (int ext_inps = inputs_avail; ext_inps >= 0 ; ext_inps--) {
1326
1340
// Get the molecule by the number of external inputs.
0 commit comments