From 1993d170eadac1849b2d0340825ae0056fc891f3 Mon Sep 17 00:00:00 2001 From: Marco Feder Date: Thu, 29 Aug 2024 11:33:41 +0200 Subject: [PATCH 1/2] Weak scaling example --- examples/weak_scaling.cc | 928 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 928 insertions(+) create mode 100644 examples/weak_scaling.cc diff --git a/examples/weak_scaling.cc b/examples/weak_scaling.cc new file mode 100644 index 00000000..bb853756 --- /dev/null +++ b/examples/weak_scaling.cc @@ -0,0 +1,928 @@ +// ----------------------------------------------------------------------------- +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception OR LGPL-2.1-or-later +// Copyright (C) XXXX - YYYY by the deal.II authors +// +// This file is part of the deal.II library. +// +// Detailed license information governing the source code and contributions +// can be found in LICENSE.md and CONTRIBUTING.md at the top level directory. +// +// ----------------------------------------------------------------------------- + + +// Weak scaling instrumentation file +// + +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +using namespace dealii; + +static constexpr double factor = 0.1; + +enum class PartitionerType +{ + metis, + rtree +}; + +/** + * POD type which stores relevant information about the agglomeration procedure. + * In case of METIS, @p agglomeration_parameter is supposed to be the number of + * agglomerates (eventually local to each process). + * When employing the r-tree strategy, @p agglomeration_parameter is supposed + * to be the level of the hierarchy to be extracted. + */ +struct AgglomerationData +{ + PartitionerType partitioning_strategy; + unsigned int agglomeration_parameter; +}; + + +/** + * Utility to compute jump terms when the interface is locally owned, i.e. both + * elements are locally owned. + */ +template +void +assemble_local_jumps_and_averages(FullMatrix &M11, + FullMatrix &M12, + FullMatrix &M21, + FullMatrix &M22, + const FEValuesBase &fe_faces0, + const FEValuesBase &fe_faces1, + const double penalty_constant, + const double h_f) +{ + const std::vector> &normals = fe_faces0.get_normal_vectors(); + const unsigned int dofs_per_cell = + M11.m(); // size of local matrices equals the #DoFs + + for (unsigned int q_index : fe_faces0.quadrature_point_indices()) + { + const Tensor<1, dim> &normal = normals[q_index]; + for (unsigned int i = 0; i < dofs_per_cell; ++i) + { + for (unsigned int j = 0; j < dofs_per_cell; ++j) + { + M11(i, j) += + (-0.5 * fe_faces0.shape_grad(i, q_index) * normal * + fe_faces0.shape_value(j, q_index) - + 0.5 * fe_faces0.shape_grad(j, q_index) * normal * + fe_faces0.shape_value(i, q_index) + + (penalty_constant / h_f) * fe_faces0.shape_value(i, q_index) * + fe_faces0.shape_value(j, q_index)) * + fe_faces0.JxW(q_index); + + M12(i, j) += + (0.5 * fe_faces0.shape_grad(i, q_index) * normal * + fe_faces1.shape_value(j, q_index) - + 0.5 * fe_faces1.shape_grad(j, q_index) * normal * + fe_faces0.shape_value(i, q_index) - + (penalty_constant / h_f) * fe_faces0.shape_value(i, q_index) * + fe_faces1.shape_value(j, q_index)) * + fe_faces1.JxW(q_index); + + + M21(i, j) += + (-0.5 * fe_faces1.shape_grad(i, q_index) * normal * + fe_faces0.shape_value(j, q_index) + + 0.5 * fe_faces0.shape_grad(j, q_index) * normal * + fe_faces1.shape_value(i, q_index) - + (penalty_constant / h_f) * fe_faces1.shape_value(i, q_index) * + fe_faces0.shape_value(j, q_index)) * + fe_faces1.JxW(q_index); + + + M22(i, j) += + (0.5 * fe_faces1.shape_grad(i, q_index) * normal * + fe_faces1.shape_value(j, q_index) + + 0.5 * fe_faces1.shape_grad(j, q_index) * normal * + fe_faces1.shape_value(i, q_index) + + (penalty_constant / h_f) * fe_faces1.shape_value(i, q_index) * + fe_faces1.shape_value(j, q_index)) * + fe_faces1.JxW(q_index); + } + } + } +} + + + +/** + * Same as above, but for a ghosted neighbor. + */ +template +void +assemble_local_jumps_and_averages_ghost( + FullMatrix &M11, + FullMatrix &M12, + FullMatrix &M21, + FullMatrix &M22, + const FEValuesBase &fe_faces0, + const std::vector> &recv_values, + const std::vector>> &recv_gradients, + const std::vector &recv_jxws, + const double penalty_constant, + const double h_f) +{ + Assert( + (recv_values.size() > 0 && recv_gradients.size() && recv_jxws.size()), + ExcMessage( + "Not possible to assemble jumps and averages at a ghosted interface.")); + const unsigned int dofs_per_cell = M11.m(); + + const std::vector> &normals = fe_faces0.get_normal_vectors(); + for (unsigned int q_index : fe_faces0.quadrature_point_indices()) + { + const Tensor<1, dim> &normal = normals[q_index]; + for (unsigned int i = 0; i < dofs_per_cell; ++i) + { + for (unsigned int j = 0; j < dofs_per_cell; ++j) + { + M11(i, j) += + (-0.5 * fe_faces0.shape_grad(i, q_index) * normal * + fe_faces0.shape_value(j, q_index) - + 0.5 * fe_faces0.shape_grad(j, q_index) * normal * + fe_faces0.shape_value(i, q_index) + + (penalty_constant / h_f) * fe_faces0.shape_value(i, q_index) * + fe_faces0.shape_value(j, q_index)) * + fe_faces0.JxW(q_index); + + M12(i, j) += + (0.5 * fe_faces0.shape_grad(i, q_index) * normal * + recv_values[j][q_index] - + 0.5 * recv_gradients[j][q_index] * normal * + fe_faces0.shape_value(i, q_index) - + (penalty_constant / h_f) * fe_faces0.shape_value(i, q_index) * + recv_values[j][q_index]) * + recv_jxws[q_index]; + + + M21(i, j) += (-0.5 * recv_gradients[i][q_index] * normal * + fe_faces0.shape_value(j, q_index) + + 0.5 * fe_faces0.shape_grad(j, q_index) * normal * + recv_values[i][q_index] - + (penalty_constant / h_f) * recv_values[i][q_index] * + fe_faces0.shape_value(j, q_index)) * + recv_jxws[q_index]; + + + M22(i, j) += (0.5 * recv_gradients[i][q_index] * normal * + recv_values[j][q_index] + + 0.5 * recv_gradients[j][q_index] * normal * + recv_values[i][q_index] + + (penalty_constant / h_f) * recv_values[i][q_index] * + recv_values[j][q_index]) * + recv_jxws[q_index]; + } + } + } +} + + + +template +class RightHandSide : public Function +{ +public: + RightHandSide(const double c = 0.5) + : Function() + { + reaction_coefficient = c; + } + + virtual void + value_list(const std::vector> &points, + std::vector &values, + const unsigned int /*component*/ = 0) const override; + +private: + double reaction_coefficient; +}; + + +template +void +RightHandSide::value_list(const std::vector> &points, + std::vector &values, + const unsigned int /*component*/) const +{ + for (unsigned int i = 0; i < values.size(); ++i) + { + const double x = points[i][0]; + const double y = points[i][1]; + const double z = points[i][2]; + values[i] = values[i] = (3. * factor * factor) * numbers::PI * + numbers::PI * std::sin(factor * numbers::PI * x) * + std::sin(factor * numbers::PI * y) * + std::sin(factor * numbers::PI * z); + } +} + + + +template +class Solution : public Function +{ +public: + Solution() + : Function() + { + static_assert(dim == 3, "Only 3D case is implemented."); + } + + virtual double + value(const Point &p, const unsigned int component = 0) const override; + + virtual void + value_list(const std::vector> &points, + std::vector &values, + const unsigned int /*component*/) const override; + + virtual Tensor<1, dim> + gradient(const Point &p, + const unsigned int component = 0) const override; +}; + +template +double +Solution::value(const Point &p, const unsigned int) const +{ + return std::sin(factor * numbers::PI * p[0]) * + std::sin(factor * numbers::PI * p[1]) * + std::sin(factor * numbers::PI * p[2]); +} + +template +Tensor<1, dim> +Solution::gradient(const Point &p, const unsigned int) const +{ + Tensor<1, dim> grad; + grad[0] = factor * numbers::PI * std::cos(factor * numbers::PI * p[0]) * + std::sin(factor * numbers::PI * p[1]) * + std::sin(factor * numbers::PI * p[2]); + grad[1] = factor * numbers::PI * std::sin(factor * numbers::PI * p[0]) * + std::cos(factor * numbers::PI * p[1]) * + std::sin(factor * numbers::PI * p[2]); + grad[2] = factor * numbers::PI * std::sin(factor * numbers::PI * p[0]) * + std::sin(factor * numbers::PI * p[1]) * + std::cos(factor * numbers::PI * p[2]); + + return grad; +} + + +template +void +Solution::value_list(const std::vector> &points, + std::vector &values, + const unsigned int /*component*/) const +{ + for (unsigned int i = 0; i < values.size(); ++i) + values[i] = this->value(points[i]); +} + + + +/*--------------------------------------------------------------------------*/ +// Class describing the 3D problem. + +template +class WeakScalingTest +{ +public: + WeakScalingTest(const AgglomerationData &agglomeration_data, + const unsigned int degree, + const double reaction_coefficient, + const unsigned int n_refinements, + const MPI_Comm comm); + void + run(); + +private: + void + make_fine_grid(); + void + setup_agglomerated_problem(); + void + assemble_system(); + void + solve(); + + const MPI_Comm comm; + const unsigned int n_ranks; + FE_DGQ fe_dg; + AgglomerationData agglomeration_data; + const double reaction_coefficient; + parallel::fullydistributed::Triangulation tria_pft; + const unsigned int n_refs; + ConditionalOStream pcout; + double penalty_constant; + PartitionerType partitioner_type; + + + std::unique_ptr> ah; + AffineConstraints constraints; + + IndexSet locally_owned_dofs; + IndexSet locally_relevant_dofs; + + TrilinosWrappers::SparseMatrix system_matrix; + TrilinosWrappers::MPI::Vector system_rhs; + TrilinosWrappers::MPI::Vector locally_relevant_solution; + TrilinosWrappers::MPI::Vector interpolated_solution; + + Solution analytical_solution; +}; + + + +template +WeakScalingTest::WeakScalingTest( + const AgglomerationData &agglomeration_data, + const unsigned int degree, + const double reaction_coefficient, + const unsigned int n_refinements, + const MPI_Comm communicator) + : comm(communicator) + , n_ranks(Utilities::MPI::n_mpi_processes(comm)) + , fe_dg(degree) + , agglomeration_data(agglomeration_data) + , reaction_coefficient(reaction_coefficient) + , tria_pft(comm) + , n_refs(n_refinements) + , pcout(std::cout, (Utilities::MPI::this_mpi_process(comm) == 0)) +{ + static_assert(dim == 3); + penalty_constant = 10. * degree * degree; + pcout << "Running with " << n_ranks << " MPI ranks." << std::endl; + partitioner_type = PartitionerType::rtree; +} + + + +template +void +WeakScalingTest::make_fine_grid() +{ + parallel::distributed::Triangulation tria(comm); + GridGenerator::hyper_cube(tria); + tria.refine_global(n_refs); + + // Partition serial triangulation: + // GridTools::partition_triangulation(n_ranks, + // tria, + // SparsityTools::Partitioner::zoltan); + + // Create building blocks: + const TriangulationDescription::Description description = + TriangulationDescription::Utilities::create_description_from_triangulation( + tria, comm); + + tria_pft.create_triangulation(description); + pcout << "Total number of fine cells: " << tria_pft.n_global_active_cells() + << std::endl; +} + + + +template +void +WeakScalingTest::setup_agglomerated_problem() +{ + GridTools::Cache cached_tria(tria_pft); + + ah = std::make_unique>(cached_tria); + + // Agglomerate cells together based on their material id. + // The following vector stores cells with same material id, i.e.: + // v[material_id] = {cells with same id} + std::vector::active_cell_iterator>> + cells_per_material_id; + + if (agglomeration_data.partitioning_strategy == PartitionerType::metis) + { + cells_per_material_id.resize(agglomeration_data.agglomeration_parameter); + + // Call the METIS partitioner to agglomerate within each processor. + PolyUtils::partition_locally_owned_regions( + agglomeration_data.agglomeration_parameter, + tria_pft, + SparsityTools::Partitioner::metis); + } + else if (agglomeration_data.partitioning_strategy == PartitionerType::rtree) + { + // Partition with Rtree + MappingQ1 mapping; // use standard mapping + + namespace bgi = boost::geometry::index; + static constexpr unsigned int max_elem_per_node = + PolyUtils::constexpr_pow(2, dim); // 2^dim + std::vector, + typename Triangulation::active_cell_iterator>> + boxes(tria_pft.n_locally_owned_active_cells()); + unsigned int i = 0; + for (const auto &cell : tria_pft.active_cell_iterators()) + if (cell->is_locally_owned()) + boxes[i++] = std::make_pair(mapping.get_bounding_box(cell), cell); + + auto tree = pack_rtree>(boxes); + pcout << "Total number of available levels: " << n_levels(tree) + << std::endl; + + Assert(n_levels(tree) >= 2, + ExcMessage("At least two levels are needed.")); + + CellsAgglomerator agglomerator{ + tree, agglomeration_data.agglomeration_parameter}; + const auto agglomerates = agglomerator.extract_agglomerates(); + // ah->connect_hierarchy(agglomerator); + + // Flag elements for agglomeration + unsigned int agglo_index = 0; + for (unsigned int i = 0; i < agglomerates.size(); ++i) + { + const auto &agglo = agglomerates[i]; // i-th agglomerate + for (const auto &el : agglo) + { + el->set_material_id(agglo_index); + } + ++agglo_index; + } + + + cells_per_material_id.resize(agglo_index); + } + else + { + Assert(false, ExcNotImplemented()); + } + + // Irrespective of the partitioner strategy, define agglomerates based on the + // material_id(s) attached to each cell. + + for (const auto &cell : tria_pft.active_cell_iterators()) + if (cell->is_locally_owned()) + cells_per_material_id[cell->material_id()].push_back(cell); + + // Agglomerate elements with same id + for (std::size_t i = 0; i < cells_per_material_id.size(); ++i) + ah->define_agglomerate(cells_per_material_id[i]); + + pcout << "Defined all the agglomerates" << std::endl; + const unsigned int n_agglomerates = + agglomeration_data.partitioning_strategy == PartitionerType::rtree ? + Utilities::MPI::sum(cells_per_material_id.size(), comm) : + Utilities::MPI::n_mpi_processes(comm) * + agglomeration_data.agglomeration_parameter; + + pcout << "Total number of agglomerates: " << n_agglomerates << std::endl; +} + + + +template +void +WeakScalingTest::assemble_system() +{ + constraints.close(); + + const unsigned int quadrature_degree = fe_dg.get_degree() + 1; + const unsigned int face_quadrature_degree = fe_dg.get_degree() + 1; + ah->initialize_fe_values(QGauss(quadrature_degree), + update_values | update_gradients | + update_JxW_values | update_quadrature_points, + QGauss(face_quadrature_degree), + update_JxW_values); + + // DynamicSparsityPattern sparsity_pattern; + // double start_setup, stop_setup; + // start_setup = MPI_Wtime(); + ah->distribute_agglomerated_dofs(fe_dg); + // stop_setup = MPI_Wtime(); + // pcout << "Setup DoFs in: " << stop_setup - start_setup << "[s]." + // << std::endl; + + + TrilinosWrappers::SparsityPattern dsp; + double start_sparsity, stop_sparsity; + start_sparsity = MPI_Wtime(); + ah->create_agglomeration_sparsity_pattern(dsp); + MPI_Barrier(comm); + stop_sparsity = MPI_Wtime(); + + locally_owned_dofs = ah->agglo_dh.locally_owned_dofs(); + locally_relevant_dofs = DoFTools::extract_locally_relevant_dofs(ah->agglo_dh); + pcout << "N DoFs: " << locally_owned_dofs.size() << std::endl; + pcout << "Sparsity in: " << stop_sparsity - start_sparsity << "[s]." + << std::endl; + + + double start_reinit, stop_reinit; + start_reinit = MPI_Wtime(); + system_matrix.reinit(dsp); + MPI_Barrier(comm); + stop_reinit = MPI_Wtime(); + pcout << "Reinit matrix in: " << stop_reinit - start_reinit << "[s]." + << std::endl; + system_rhs.reinit(locally_owned_dofs, comm); + + std::unique_ptr> rhs_function; + rhs_function = + std::make_unique>(reaction_coefficient); + + + const unsigned int dofs_per_cell = fe_dg.n_dofs_per_cell(); + + FullMatrix cell_matrix(dofs_per_cell, dofs_per_cell); + Vector cell_rhs(dofs_per_cell); + + FullMatrix M11(dofs_per_cell, dofs_per_cell); + FullMatrix M12(dofs_per_cell, dofs_per_cell); + FullMatrix M21(dofs_per_cell, dofs_per_cell); + FullMatrix M22(dofs_per_cell, dofs_per_cell); + + std::vector local_dof_indices(dofs_per_cell); + std::vector local_dof_indices_neighbor( + dofs_per_cell); + + double start_assembly, stop_assembly; + start_assembly = MPI_Wtime(); + auto polytope = ah->begin(); + for (; polytope != ah->end(); ++polytope) + { + if (polytope->is_locally_owned()) + { + cell_matrix = 0.; + cell_rhs = 0.; + + const auto &agglo_values = ah->reinit(polytope); + + const auto &q_points = agglo_values.get_quadrature_points(); + const unsigned int n_qpoints = q_points.size(); + FullMatrix partial_matrix(dofs_per_cell, n_qpoints * dim); + std::vector rhs(n_qpoints); + rhs_function->value_list(q_points, rhs); + + + + for (unsigned int q_index : agglo_values.quadrature_point_indices()) + { + const double sqrtjxw = std::sqrt(agglo_values.JxW(q_index)); + for (unsigned int i = 0; i < dofs_per_cell; ++i) + { + const auto &grad = agglo_values.shape_grad(i, q_index); + for (unsigned int d = 0; d < dim; ++d) + { + partial_matrix(i, q_index * dim + d) = sqrtjxw * grad[d]; + } + cell_rhs(i) += agglo_values.shape_value(i, q_index) * + rhs[q_index] * agglo_values.JxW(q_index); + } + } + + // get volumetric DoFs + polytope->get_dof_indices(local_dof_indices); + + partial_matrix.mTmult(cell_matrix, partial_matrix); + + // Assemble face terms + unsigned int n_faces = polytope->n_faces(); + + const double h_f = polytope->diameter(); + for (unsigned int f = 0; f < n_faces; ++f) + { + if (polytope->at_boundary(f)) + { + // Get normal vectors seen from each agglomeration. + const auto &fe_face = ah->reinit(polytope, f); + const auto &normals = fe_face.get_normal_vectors(); + + const auto &face_q_points = fe_face.get_quadrature_points(); + + std::vector analytical_solution_values( + face_q_points.size()); + analytical_solution.value_list(face_q_points, + analytical_solution_values, + 1); + + for (unsigned int q_index : + fe_face.quadrature_point_indices()) + { + const Tensor<1, dim> &normal = normals[q_index]; + for (unsigned int i = 0; i < dofs_per_cell; ++i) + { + for (unsigned int j = 0; j < dofs_per_cell; ++j) + { + cell_matrix(i, j) += + (-fe_face.shape_value(i, q_index) * + fe_face.shape_grad(j, q_index) * normal - + fe_face.shape_grad(i, q_index) * normal * + fe_face.shape_value(j, q_index) + + (penalty_constant / h_f) * + fe_face.shape_value(i, q_index) * + fe_face.shape_value(j, q_index)) * + fe_face.JxW(q_index); + } + cell_rhs(i) += + ((penalty_constant / h_f) * + analytical_solution_values[q_index] * + fe_face.shape_value(i, q_index) - + fe_face.shape_grad(i, q_index) * normal * + analytical_solution_values[q_index]) * + fe_face.JxW(q_index); + } + } + } + else + { + const auto &neigh_polytope = polytope->neighbor(f); + if (polytope->id() < neigh_polytope->id()) + { + unsigned int nofn = + polytope->neighbor_of_agglomerated_neighbor(f); + + Assert(neigh_polytope->neighbor(nofn)->id() == + polytope->id(), + ExcMessage("Mismatch.")); + + const auto &fe_faces = + ah->reinit_interface(polytope, neigh_polytope, f, nofn); + const auto &fe_faces0 = fe_faces.first; + + if (neigh_polytope->is_locally_owned()) + { + // use both fevalues + const auto &fe_faces1 = fe_faces.second; + + M11 = 0.; + M12 = 0.; + M21 = 0.; + M22 = 0.; + + assemble_local_jumps_and_averages(M11, + M12, + M21, + M22, + fe_faces0, + fe_faces1, + penalty_constant, + h_f); + + // distribute DoFs accordingly + // fluxes + neigh_polytope->get_dof_indices( + local_dof_indices_neighbor); + + constraints.distribute_local_to_global( + M11, local_dof_indices, system_matrix); + constraints.distribute_local_to_global( + M12, + local_dof_indices, + local_dof_indices_neighbor, + system_matrix); + constraints.distribute_local_to_global( + M21, + local_dof_indices_neighbor, + local_dof_indices, + system_matrix); + constraints.distribute_local_to_global( + M22, local_dof_indices_neighbor, system_matrix); + } + else + { + // neigh polytope is ghosted, so retrieve necessary + // metadata. + + types::subdomain_id neigh_rank = + neigh_polytope->subdomain_id(); + + const auto &recv_jxws = + ah->recv_jxws.at(neigh_rank) + .at({neigh_polytope->id(), nofn}); + + const auto &recv_values = + ah->recv_values.at(neigh_rank) + .at({neigh_polytope->id(), nofn}); + + const auto &recv_gradients = + ah->recv_gradients.at(neigh_rank) + .at({neigh_polytope->id(), nofn}); + + M11 = 0.; + M12 = 0.; + M21 = 0.; + M22 = 0.; + + // there's no FEFaceValues on the other side (it's + // ghosted), so we just pass the actual data we have + // recevied from the neighboring ghosted polytope + assemble_local_jumps_and_averages_ghost( + M11, + M12, + M21, + M22, + fe_faces0, + recv_values, + recv_gradients, + recv_jxws, + penalty_constant, + h_f); + + + // distribute DoFs accordingly + // fluxes + neigh_polytope->get_dof_indices( + local_dof_indices_neighbor); + + constraints.distribute_local_to_global( + M11, local_dof_indices, system_matrix); + constraints.distribute_local_to_global( + M12, + local_dof_indices, + local_dof_indices_neighbor, + system_matrix); + constraints.distribute_local_to_global( + M21, + local_dof_indices_neighbor, + local_dof_indices, + system_matrix); + constraints.distribute_local_to_global( + M22, local_dof_indices_neighbor, system_matrix); + } // ghosted polytope case + + + } // only once + } // internal face + } // face loop + + constraints.distribute_local_to_global(cell_matrix, + cell_rhs, + local_dof_indices, + system_matrix, + system_rhs); + + } // locally owned polytopes + } + + system_matrix.compress(VectorOperation::add); + system_rhs.compress(VectorOperation::add); + MPI_Barrier(comm); + stop_assembly = MPI_Wtime(); + pcout << "Assembled in: " << stop_assembly - start_assembly << "[s]." + << std::endl; +} + + + +template +void +WeakScalingTest::solve() +{ + locally_relevant_solution.reinit(locally_owned_dofs, + locally_relevant_dofs, + comm); + + pcout << "Start solver" << std::endl; + + SolverControl control(800, 1e-8, false, false); + SolverCG solver(control); + TrilinosWrappers::PreconditionILU precondition; + precondition.initialize(system_matrix); + // SolverControl solver_control(1); + // TrilinosWrappers::SolverDirect solver(solver_control); + pcout << "Initialized preconditioner" << std::endl; + + TrilinosWrappers::MPI::Vector completely_distributed_solution(system_rhs); + double start_solver, stop_solver; + start_solver = MPI_Wtime(); + solver.solve(system_matrix, + completely_distributed_solution, + system_rhs, + precondition); + MPI_Barrier(comm); + stop_solver = MPI_Wtime(); + + pcout << "Linear system solved in: " << stop_solver - start_solver << "[s]." + << std::endl; + pcout << "Number of outer iterations: " << control.last_step() << std::endl; + + constraints.distribute(completely_distributed_solution); + locally_relevant_solution = completely_distributed_solution; + +#ifdef FALSE + PolyUtils::interpolate_to_fine_grid(*ah, + interpolated_solution, + completely_distributed_solution); + + Vector cellwise_error(completely_distributed_solution.size()); + VectorTools::integrate_difference(ah->output_dh, + interpolated_solution, + Solution(), + cellwise_error, + QGauss(2 * fe_dg.degree + 1), + VectorTools::NormType::L2_norm); + const double error = + VectorTools::compute_global_error(tria_pft, + cellwise_error, + VectorTools::NormType::L2_norm); + + VectorTools::integrate_difference(ah->output_dh, + interpolated_solution, + Solution(), + cellwise_error, + QGauss(2 * fe_dg.degree + 1), + VectorTools::NormType::H1_seminorm); + const double semiH1error = + VectorTools::compute_global_error(tria_pft, + cellwise_error, + VectorTools::NormType::H1_seminorm); + + pcout << "L2 error: " << error << std::endl; + pcout << "Semi H1 error: " << semiH1error << std::endl; +#endif +} + + + +template +void +WeakScalingTest::run() +{ + make_fine_grid(); + setup_agglomerated_problem(); + assemble_system(); + solve(); +} + + + +int +main(int argc, char *argv[]) +{ + Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1); + const MPI_Comm comm = MPI_COMM_WORLD; + static constexpr unsigned int dim = 3; + + // Setup agglomeration data for METIS + const unsigned int n_cycles = 8; + double n_agglo = 500; + std::vector metis_datas; + AgglomerationData data; + data.partitioning_strategy = PartitionerType::metis; + data.agglomeration_parameter = static_cast( + n_agglo / Utilities::MPI::n_mpi_processes(comm)); // agglomerate per process + metis_datas.push_back(data); + + for (unsigned int cycle = 1; cycle < n_cycles; ++cycle) + { + n_agglo *= 8; + AgglomerationData data; + data.partitioning_strategy = PartitionerType::metis; + data.agglomeration_parameter = static_cast( + n_agglo / Utilities::MPI::n_mpi_processes(comm)); + metis_datas.push_back(data); + } + + const unsigned int reaction_coefficient = 0.; + const unsigned int degree = 2; + std::vector n_global_refinements; + for (unsigned int cycle = 0; cycle < n_cycles; ++cycle) + { + n_global_refinements.push_back(4 + cycle); + if (Utilities::MPI::this_mpi_process(comm) == 0) + std::cout << "N_refinements: " << n_global_refinements[cycle] + << std::endl; + WeakScalingTest problem(metis_datas[cycle], + degree, + reaction_coefficient, + n_global_refinements[cycle], + comm); + problem.run(); + } +} \ No newline at end of file From 91dcbb04201c13941f9be50deaa46ef823998afe Mon Sep 17 00:00:00 2001 From: Marco Feder Date: Tue, 3 Sep 2024 14:36:52 +0200 Subject: [PATCH 2/2] Update weak scaling application --- examples/weak_scaling.cc | 127 +++++++++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 45 deletions(-) diff --git a/examples/weak_scaling.cc b/examples/weak_scaling.cc index bb853756..d5dc4691 100644 --- a/examples/weak_scaling.cc +++ b/examples/weak_scaling.cc @@ -63,6 +63,34 @@ struct AgglomerationData }; +struct Timers +{ + double reinit; + double sparsity_pattern; + double distribute_dofs; + double assembly; + double solve; + void + print_summary(const MPI_Comm &communicator) const + { + if (Utilities::MPI::this_mpi_process(communicator) == 0) + { + std::cout << "- - - - - - - - - - - Summary - - - - - - - - - - -" + << std::endl; + std::cout << std::left << std::setw(24) << std::scientific + << "Reinit: " << std::setw(6) << reinit << " [s]" + << std::endl; + std::cout << std::left << std::setw(24) << std::scientific + << "Sparsity: " << sparsity_pattern << " [s]" << std::endl; + std::cout << std::left << std::setw(24) << std::scientific + << "Assembly: " << assembly << " [s]" << std::endl; + std::cout << std::left << std::setw(24) << std::scientific + << "Solve: " << solve << " [s]" << std::endl; + } + } +}; + + /** * Utility to compute jump terms when the interface is locally owned, i.e. both * elements are locally owned. @@ -358,6 +386,8 @@ class WeakScalingTest TrilinosWrappers::MPI::Vector interpolated_solution; Solution analytical_solution; + + Timers times; }; @@ -520,14 +550,7 @@ WeakScalingTest::assemble_system() QGauss(face_quadrature_degree), update_JxW_values); - // DynamicSparsityPattern sparsity_pattern; - // double start_setup, stop_setup; - // start_setup = MPI_Wtime(); ah->distribute_agglomerated_dofs(fe_dg); - // stop_setup = MPI_Wtime(); - // pcout << "Setup DoFs in: " << stop_setup - start_setup << "[s]." - // << std::endl; - TrilinosWrappers::SparsityPattern dsp; double start_sparsity, stop_sparsity; @@ -539,8 +562,9 @@ WeakScalingTest::assemble_system() locally_owned_dofs = ah->agglo_dh.locally_owned_dofs(); locally_relevant_dofs = DoFTools::extract_locally_relevant_dofs(ah->agglo_dh); pcout << "N DoFs: " << locally_owned_dofs.size() << std::endl; - pcout << "Sparsity in: " << stop_sparsity - start_sparsity << "[s]." - << std::endl; + pcout << "Creation of SparsityPattern in: " << stop_sparsity - start_sparsity + << "[s]." << std::endl; + times.sparsity_pattern = stop_sparsity - start_sparsity; double start_reinit, stop_reinit; @@ -550,6 +574,7 @@ WeakScalingTest::assemble_system() stop_reinit = MPI_Wtime(); pcout << "Reinit matrix in: " << stop_reinit - start_reinit << "[s]." << std::endl; + times.reinit = stop_reinit - start_reinit; system_rhs.reinit(locally_owned_dofs, comm); std::unique_ptr> rhs_function; @@ -796,6 +821,7 @@ WeakScalingTest::assemble_system() stop_assembly = MPI_Wtime(); pcout << "Assembled in: " << stop_assembly - start_assembly << "[s]." << std::endl; + times.assembly = stop_assembly - start_assembly; } @@ -831,6 +857,7 @@ WeakScalingTest::solve() pcout << "Linear system solved in: " << stop_solver - start_solver << "[s]." << std::endl; pcout << "Number of outer iterations: " << control.last_step() << std::endl; + times.solve = stop_solver - start_solver; constraints.distribute(completely_distributed_solution); locally_relevant_solution = completely_distributed_solution; @@ -878,6 +905,7 @@ WeakScalingTest::run() setup_agglomerated_problem(); assemble_system(); solve(); + times.print_summary(comm); } @@ -885,44 +913,53 @@ WeakScalingTest::run() int main(int argc, char *argv[]) { - Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1); - const MPI_Comm comm = MPI_COMM_WORLD; - static constexpr unsigned int dim = 3; - - // Setup agglomeration data for METIS - const unsigned int n_cycles = 8; - double n_agglo = 500; - std::vector metis_datas; - AgglomerationData data; - data.partitioning_strategy = PartitionerType::metis; - data.agglomeration_parameter = static_cast( - n_agglo / Utilities::MPI::n_mpi_processes(comm)); // agglomerate per process - metis_datas.push_back(data); - - for (unsigned int cycle = 1; cycle < n_cycles; ++cycle) + try { - n_agglo *= 8; - AgglomerationData data; - data.partitioning_strategy = PartitionerType::metis; - data.agglomeration_parameter = static_cast( - n_agglo / Utilities::MPI::n_mpi_processes(comm)); - metis_datas.push_back(data); - } - - const unsigned int reaction_coefficient = 0.; - const unsigned int degree = 2; - std::vector n_global_refinements; - for (unsigned int cycle = 0; cycle < n_cycles; ++cycle) - { - n_global_refinements.push_back(4 + cycle); + Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1); + const MPI_Comm comm = MPI_COMM_WORLD; + static constexpr unsigned int dim = 3; + AssertThrow(argc == 3, + ExcMessage("Wrong number of command line arguments.")); + + // Setup agglomeration data + const unsigned int level = std::atoi(argv[1]); // 2 + AgglomerationData data; + data.partitioning_strategy = PartitionerType::rtree; + data.agglomeration_parameter = level; + + const unsigned int reaction_coefficient = 0.; + const unsigned int degree = 1; + const unsigned int n_global_refinements = 5 + std::atoi(argv[2]); if (Utilities::MPI::this_mpi_process(comm) == 0) - std::cout << "N_refinements: " << n_global_refinements[cycle] - << std::endl; - WeakScalingTest problem(metis_datas[cycle], - degree, - reaction_coefficient, - n_global_refinements[cycle], - comm); + std::cout << "N_refinements: " << n_global_refinements << std::endl; + WeakScalingTest problem( + data, degree, reaction_coefficient, n_global_refinements, comm); problem.run(); } + catch (std::exception &exc) + { + std::cerr << std::endl + << std::endl + << "----------------------------------------------------" + << std::endl; + std::cerr << "Exception on processing: " << std::endl + << exc.what() << std::endl + << "Aborting!" << std::endl + << "----------------------------------------------------" + << std::endl; + return 1; + } + catch (...) + { + std::cerr << std::endl + << std::endl + << "----------------------------------------------------" + << std::endl; + std::cerr << "Unknown exception!" << std::endl + << "Aborting!" << std::endl + << "----------------------------------------------------" + << std::endl; + return 1; + } + return 0; } \ No newline at end of file