From c4c243e5eb0bcbfbe5dcb0872a7b7135b3b373e9 Mon Sep 17 00:00:00 2001 From: Ben Krohn-Hansen Date: Thu, 1 May 2025 16:55:59 -0700 Subject: [PATCH 01/28] Commit --- distributed_pcg.cpp | 67 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index 36dd08c..ddefc50 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -40,6 +40,51 @@ class Matrix{ return b; } }; + +class CSRMatrix { + public: + int nbrow, nbcol; + std::vector values; + std::vector col_indices; + std::vector row_indices; + + // Constructor: Build a 1D Poisson matrix with Dirichlet BCs + CSRMatrix(int nrows, int ncols) : nbrow(nrows), nbcol(ncols) { + row_indices.resize(nbrow + 1); + for (int i = 0; i < nbrow; ++i) { + row_indices[i] = values.size(); + + if (i > 0) { + values.push_back(-1.0); + col_indices.push_back(i - 1); + } + + values.push_back(2.0); + col_indices.push_back(i); + + if (i + 1 < nbcol) { + values.push_back(-1.0); + col_indices.push_back(i + 1); + } + } + row_indices[nbrow] = values.size(); + } + + // Matrix-vector multiplication y = A * x + std::vector operator*(const std::vector& x) const { + assert(x.size() == nbcol); + std::vector y(nbrow, 0.0); + for (int i = 0; i < nbrow; ++i) { + for (int j = row_indices[i]; j < row_indices[i + 1]; ++j) { + y[i] += values[j] * x[col_indices[j]]; + } + } + return y; + } + + int NbRow() const { return nbrow; } + int NbCol() const { return nbcol; } +}; // scalar product (u, v) double operator,(const std::vector& u, const std::vector& v){ @@ -88,6 +133,8 @@ std::vector prec(const Eigen::SimplicialCholesky& b, std::vector& x, doub coefficients.push_back(Eigen::Triplet(j, k, it -> second)); } + std::vector> CSR_coefficients; + for (int row = 0; row < A.NbRow(); ++row) { + for (int idx = A.row_indeces[row]; idx < A.row_indeces[row + 1]; ++idx) { + int col = A.col_indices[idx]; + double val = A.values[idx]; + CSR_coefficients.push_back(Eigen::Triplet(row, col, val)); + } + } + + std::cout << "Triplets from A.data (original map-based):\n"; + for (const auto& t : coefficients) { + std::cout << "(" << t.row() << ", " << t.col() << ") = " << t.value() << "\n"; + } + + std::cout << "\nTriplets from CSR matrix:\n"; + for (const auto& t : CSR_coefficients) { + std::cout << "(" << t.row() << ", " << t.col() << ") = " << t.value() << "\n"; + } + // compute the Cholesky factorization of the diagonal block for the preconditioner Eigen::SparseMatrix B(n, n); B.setFromTriplets(coefficients.begin(), coefficients.end()); From 709a2e5dd893e0e2e57ad6f3af2e9edb28835275 Mon Sep 17 00:00:00 2001 From: Ben Krohn-Hansen Date: Thu, 1 May 2025 16:57:44 -0700 Subject: [PATCH 02/28] Commit --- distributed_pcg.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index ddefc50..1e4e1ed 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -169,10 +169,10 @@ void CG_Solver::solve(const std::vector& b, std::vector& x, doub } std::vector> CSR_coefficients; - for (int row = 0; row < A.NbRow(); ++row) { - for (int idx = A.row_indeces[row]; idx < A.row_indeces[row + 1]; ++idx) { - int col = A.col_indices[idx]; - double val = A.values[idx]; + for (int row = 0; row < CSR_A.NbRow(); ++row) { + for (int idx = CSR_A.row_indices[row]; idx < CSR_A.row_indices[row + 1]; ++idx) { + int col = CSR_A.col_indices[idx]; + double val = CSR_A.values[idx]; CSR_coefficients.push_back(Eigen::Triplet(row, col, val)); } } From 8762a5ad87b092cb80b84a0fdd9ac6865115ccb4 Mon Sep 17 00:00:00 2001 From: Ben Krohn-Hansen Date: Thu, 1 May 2025 20:17:55 -0700 Subject: [PATCH 03/28] Commit --- distributed_pcg.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index 1e4e1ed..7099bbd 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -49,7 +49,7 @@ class CSRMatrix { std::vector row_indices; // Constructor: Build a 1D Poisson matrix with Dirichlet BCs - CSRMatrix(int nrows, int ncols) : nbrow(nrows), nbcol(ncols) { + CSRMatrix(const int& nrows=0, const int& ncols=0) : nbrow(nrows), nbcol(ncols) { row_indices.resize(nbrow + 1); for (int i = 0; i < nbrow; ++i) { row_indices[i] = values.size(); From 918522b5b680f99df11c97d03b6c375ec5873ad1 Mon Sep 17 00:00:00 2001 From: Ben Krohn-Hansen Date: Thu, 1 May 2025 20:27:44 -0700 Subject: [PATCH 04/28] Commit --- distributed_pcg.cpp | 60 +++++++-------------------------------------- 1 file changed, 9 insertions(+), 51 deletions(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index 7099bbd..e08da9a 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -8,39 +8,6 @@ #include -class Matrix{ - public: - typedef std::pair N2; - - std::map data; - int nbrow; - int nbcol; - - Matrix(const int& nr = 0, const int& nc = 0): nbrow(nr), nbcol(nc) { - for (int i = 0; i < nc; ++i) { - data[std::make_pair(i, i)] = 3.0; - if (i - 1 >= 0) data[std::make_pair(i, i - 1)] = -1.0; - if (i + 1 < nc) data[std::make_pair(i, i + 1)] = -1.0; - } - }; - - int NbRow() const {return nbrow;} - int NbCol() const {return nbcol;} - - // matrix-vector product with vector xi - std::vector operator*(const std::vector& xi) const { - std::vector b(NbRow(), 0.); - for(auto it = data.begin(); it != data.end(); ++it){ - int j = (it->first).first; - int k = (it->first).second; - double Mjk = it->second; - b[j] += Mjk * xi[k]; - } - - return b; - } -}; - class CSRMatrix { public: int nbrow, nbcol; @@ -132,7 +99,6 @@ std::vector prec(const Eigen::SimplicialCholesky& b, std::vector& x, doub int n = A.NbCol(); // get the local diagonal block of A - std::vector> coefficients; - for(auto it = A.data.begin(); it != A.data.end(); ++it){ - int j = (it->first).first; - int k = (it->first).second; - coefficients.push_back(Eigen::Triplet(j, k, it -> second)); - } - std::vector> CSR_coefficients; for (int row = 0; row < CSR_A.NbRow(); ++row) { for (int idx = CSR_A.row_indices[row]; idx < CSR_A.row_indices[row + 1]; ++idx) { @@ -177,19 +135,19 @@ void CG_Solver::solve(const std::vector& b, std::vector& x, doub } } - std::cout << "Triplets from A.data (original map-based):\n"; - for (const auto& t : coefficients) { - std::cout << "(" << t.row() << ", " << t.col() << ") = " << t.value() << "\n"; - } + // ========================================== + // UNCOMMENT TO PRINT CHECK COEFFICIENTS + // ========================================== - std::cout << "\nTriplets from CSR matrix:\n"; - for (const auto& t : CSR_coefficients) { - std::cout << "(" << t.row() << ", " << t.col() << ") = " << t.value() << "\n"; - } + // std::cout << "\nTriplets from CSR matrix:\n"; + // for (const auto& t : CSR_coefficients) { + // std::cout << "(" << t.row() << ", " << t.col() << ") = " << t.value() << "\n"; + // } + // compute the Cholesky factorization of the diagonal block for the preconditioner Eigen::SparseMatrix B(n, n); - B.setFromTriplets(coefficients.begin(), coefficients.end()); + B.setFromTriplets(CSR_coefficients.begin(), CSR_coefficients.end()); Eigen::SimplicialCholesky> P(B); const double epsilon = tol * std::sqrt((b, b)); From 38cd7590dc5de8c686b90ec5a8d0ddd0c9fae20c Mon Sep 17 00:00:00 2001 From: Ben Krohn-Hansen Date: Thu, 1 May 2025 20:31:22 -0700 Subject: [PATCH 05/28] Commit --- distributed_pcg.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index e08da9a..cf7ab83 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -123,7 +123,7 @@ void CG_Solver::solve(const std::vector& b, std::vector& x, doub int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Get the rank of the process - int n = A.NbCol(); + int n = CSR_A.NbCol(); // get the local diagonal block of A std::vector> CSR_coefficients; From ae7ea894f2ce2fe8e556fc6651f0ea83ed93f323 Mon Sep 17 00:00:00 2001 From: Ben Krohn-Hansen Date: Thu, 1 May 2025 20:32:22 -0700 Subject: [PATCH 06/28] Commit --- distributed_pcg.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index cf7ab83..c96201b 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -99,7 +99,7 @@ std::vector prec(const Eigen::SimplicialCholesky& b, std::vector& x, doub int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Get the rank of the process - int n = CSR_A.NbCol(); + int n = A.NbCol(); // get the local diagonal block of A - std::vector> CSR_coefficients; - for (int row = 0; row < CSR_A.NbRow(); ++row) { - for (int idx = CSR_A.row_indices[row]; idx < CSR_A.row_indices[row + 1]; ++idx) { - int col = CSR_A.col_indices[idx]; - double val = CSR_A.values[idx]; - CSR_coefficients.push_back(Eigen::Triplet(row, col, val)); + std::vector> coefficients; + for (int row = 0; row < A.NbRow(); ++row) { + for (int idx = A.row_indices[row]; idx < A.row_indices[row + 1]; ++idx) { + int col = A.col_indices[idx]; + double val = A.values[idx]; + coefficients.push_back(Eigen::Triplet(row, col, val)); } } @@ -140,14 +140,14 @@ void CG_Solver::solve(const std::vector& b, std::vector& x, doub // ========================================== // std::cout << "\nTriplets from CSR matrix:\n"; - // for (const auto& t : CSR_coefficients) { + // for (const auto& t : coefficients) { // std::cout << "(" << t.row() << ", " << t.col() << ") = " << t.value() << "\n"; // } // compute the Cholesky factorization of the diagonal block for the preconditioner Eigen::SparseMatrix B(n, n); - B.setFromTriplets(CSR_coefficients.begin(), CSR_coefficients.end()); + B.setFromTriplets(coefficients.begin(), coefficients.end()); Eigen::SimplicialCholesky> P(B); const double epsilon = tol * std::sqrt((b, b)); From 36633fe34e8e1cd6c792b7bc9d31f5db6a7452a2 Mon Sep 17 00:00:00 2001 From: Ben Krohn-Hansen Date: Thu, 1 May 2025 20:37:40 -0700 Subject: [PATCH 07/28] Commit --- distributed_pcg.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index c96201b..8c6c65f 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -139,10 +139,10 @@ void CG_Solver::solve(const std::vector& b, std::vector& x, doub // UNCOMMENT TO PRINT CHECK COEFFICIENTS // ========================================== - // std::cout << "\nTriplets from CSR matrix:\n"; - // for (const auto& t : coefficients) { - // std::cout << "(" << t.row() << ", " << t.col() << ") = " << t.value() << "\n"; - // } + std::cout << "\nTriplets from CSR matrix:\n"; + for (const auto& t : coefficients) { + std::cout << "(" << t.row() << ", " << t.col() << ") = " << t.value() << "\n"; + } // compute the Cholesky factorization of the diagonal block for the preconditioner From c85c9dc47ee080910262677a147bf9acf6d19659 Mon Sep 17 00:00:00 2001 From: Ben Krohn-Hansen Date: Thu, 1 May 2025 20:40:36 -0700 Subject: [PATCH 08/28] Commit --- distributed_pcg.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index 8c6c65f..e66bcc7 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -157,6 +157,17 @@ void CG_Solver::solve(const std::vector& b, std::vector& x, doub double res = std::sqrt((r, r)); int num_it = 0; + + if (rank == 0) { + std::vector Ap = A * p; + std::cout << "A * p = ["; + for (size_t i = 0; i < Ap.size(); ++i) { + std::cout << Ap[i]; + if (i < Ap.size() - 1) std::cout << ", "; + } + std::cout << "]\n"; + } + while(res >= epsilon) { alpha = (r, z) / (p, A * p); From af16de02d7b7f0347361c9c1307f87cd8aad4ebf Mon Sep 17 00:00:00 2001 From: Ben Krohn-Hansen Date: Thu, 1 May 2025 20:42:01 -0700 Subject: [PATCH 09/28] Commit --- distributed_pcg.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index e66bcc7..38a5033 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -158,15 +158,13 @@ void CG_Solver::solve(const std::vector& b, std::vector& x, doub int num_it = 0; - if (rank == 0) { - std::vector Ap = A * p; - std::cout << "A * p = ["; - for (size_t i = 0; i < Ap.size(); ++i) { - std::cout << Ap[i]; - if (i < Ap.size() - 1) std::cout << ", "; - } - std::cout << "]\n"; + std::vector Ap = A * p; + std::cout << "A * p = ["; + for (size_t i = 0; i < Ap.size(); ++i) { + std::cout << Ap[i]; + if (i < Ap.size() - 1) std::cout << ", "; } + std::cout << "]\n"; while(res >= epsilon) { From 9e598f8b406e2da9494fd18dbdad3abc9915f440 Mon Sep 17 00:00:00 2001 From: Ben Krohn-Hansen Date: Thu, 1 May 2025 20:45:23 -0700 Subject: [PATCH 10/28] Commit --- distributed_pcg.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index 38a5033..337b00e 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -139,10 +139,10 @@ void CG_Solver::solve(const std::vector& b, std::vector& x, doub // UNCOMMENT TO PRINT CHECK COEFFICIENTS // ========================================== - std::cout << "\nTriplets from CSR matrix:\n"; - for (const auto& t : coefficients) { - std::cout << "(" << t.row() << ", " << t.col() << ") = " << t.value() << "\n"; - } + // std::cout << "\nTriplets from CSR matrix:\n"; + // for (const auto& t : coefficients) { + // std::cout << "(" << t.row() << ", " << t.col() << ") = " << t.value() << "\n"; + // } // compute the Cholesky factorization of the diagonal block for the preconditioner @@ -158,13 +158,13 @@ void CG_Solver::solve(const std::vector& b, std::vector& x, doub int num_it = 0; - std::vector Ap = A * p; - std::cout << "A * p = ["; - for (size_t i = 0; i < Ap.size(); ++i) { - std::cout << Ap[i]; - if (i < Ap.size() - 1) std::cout << ", "; - } - std::cout << "]\n"; + // std::vector Ap = A * p; + // std::cout << "A * p = ["; + // for (size_t i = 0; i < Ap.size(); ++i) { + // std::cout << Ap[i]; + // if (i < Ap.size() - 1) std::cout << ", "; + // } + // std::cout << "]\n"; while(res >= epsilon) { From 894cc58f3498f32f88224ae0b5ccee90938e5745 Mon Sep 17 00:00:00 2001 From: Ben Krohn-Hansen Date: Thu, 1 May 2025 20:58:00 -0700 Subject: [PATCH 11/28] Commit --- .vscode/settings.json | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..e2c18d1 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "ostream": "cpp" + } +} \ No newline at end of file From e862ae69ece583b8eb5e3ff522a252be8824b5b7 Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 13:38:27 -0700 Subject: [PATCH 12/28] 1D row partitioning --- distributed_pcg.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index 337b00e..595da30 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -112,7 +112,22 @@ CSRMatrix A; * Note that the starter code only works for 1 rank and it is not efficient. */ CG_Solver::CG_Solver(const int& n, const int& N) { - A = CSRMatrix(n, N); + // 1) Discover the MPI rank and the total number of ranks + int rank, size; + MPI_Comm_rank(CMPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + // 2) Compute how many rows we own + int n_local = n; + + // 3) Which global rows are they + int row_start = rank * n_local; + int row_end = row_start + n_local -1; + + std::cout << "Rank" << rank << " owns rows " << row_start << "-" << row_end << "\n"; + + // 4) Pass those into the CSR builder so it only builds that slice + A = CSRMatrix(n_local, N, row_start); } /* The preconditioned conjugate gradient method solving Ax = b with tolerance tol. @@ -182,4 +197,4 @@ void CG_Solver::solve(const std::vector& b, std::vector& x, doub std::cout << "residual: " << res << "\n"; } } - } \ No newline at end of file + } From 4d0bde72936ec8540470c137202706755244c946 Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 13:40:48 -0700 Subject: [PATCH 13/28] Update distributed_pcg.cpp --- distributed_pcg.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index 595da30..61295e0 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -114,7 +114,7 @@ CSRMatrix A; CG_Solver::CG_Solver(const int& n, const int& N) { // 1) Discover the MPI rank and the total number of ranks int rank, size; - MPI_Comm_rank(CMPI_COMM_WORLD, &rank); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); // 2) Compute how many rows we own @@ -127,7 +127,8 @@ CG_Solver::CG_Solver(const int& n, const int& N) { std::cout << "Rank" << rank << " owns rows " << row_start << "-" << row_end << "\n"; // 4) Pass those into the CSR builder so it only builds that slice - A = CSRMatrix(n_local, N, row_start); + CSRMatrix fullA(N, N); + A = fullA.submatrix(row_start, n_local); } /* The preconditioned conjugate gradient method solving Ax = b with tolerance tol. From 8ea1e77e49ab179c4f70076137c3f2da50f1071a Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 13:49:00 -0700 Subject: [PATCH 14/28] Update distributed_pcg.cpp --- distributed_pcg.cpp | 108 ++++++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 44 deletions(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index 61295e0..9d95452 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -9,48 +9,69 @@ #include class CSRMatrix { - public: - int nbrow, nbcol; - std::vector values; - std::vector col_indices; - std::vector row_indices; - - // Constructor: Build a 1D Poisson matrix with Dirichlet BCs - CSRMatrix(const int& nrows=0, const int& ncols=0) : nbrow(nrows), nbcol(ncols) { - row_indices.resize(nbrow + 1); - for (int i = 0; i < nbrow; ++i) { - row_indices[i] = values.size(); - - if (i > 0) { - values.push_back(-1.0); - col_indices.push_back(i - 1); - } - - values.push_back(2.0); - col_indices.push_back(i); - - if (i + 1 < nbcol) { - values.push_back(-1.0); - col_indices.push_back(i + 1); - } +public: + int nbrow, nbcol; + std::vector values; + std::vector col_indices; + std::vector row_indices; + + // 1) Full-matrix constructor (optional; you can keep or remove this) + CSRMatrix(const int& nrows=0, const int& ncols=0) + : nbrow(nrows), nbcol(ncols) + { + row_indices.resize(nbrow + 1); + for(int i=0; i 0) { + values.push_back(-1.0); + col_indices.push_back(i-1); + } + values.push_back(2.0); + col_indices.push_back(i); + if(i+1 < nbcol) { + values.push_back(-1.0); + col_indices.push_back(i+1); + } + } + row_indices[nbrow] = values.size(); + } + + // 2) Slice constructor: only build rows [row_start … row_start+nbrow-1] + CSRMatrix(const int& nrows, const int& ncols, const int& row_start) + : nbrow(nrows), nbcol(ncols) + { + row_indices.resize(nbrow + 1); + for(int i_local = 0; i_local < nbrow; ++i_local) { + int ig = row_start + i_local; // global row index + row_indices[i_local] = values.size(); + if (ig > 0) { + values.push_back(-1.0); + col_indices.push_back((ig - 1) - row_start); } - row_indices[nbrow] = values.size(); - } - - // Matrix-vector multiplication y = A * x - std::vector operator*(const std::vector& x) const { - assert(x.size() == nbcol); - std::vector y(nbrow, 0.0); - for (int i = 0; i < nbrow; ++i) { - for (int j = row_indices[i]; j < row_indices[i + 1]; ++j) { - y[i] += values[j] * x[col_indices[j]]; - } + values.push_back(2.0); + col_indices.push_back(ig - row_start); + if (ig + 1 < nbcol) { + values.push_back(-1.0); + col_indices.push_back((ig + 1) - row_start); } - return y; - } - - int NbRow() const { return nbrow; } - int NbCol() const { return nbcol; } + } + row_indices[nbrow] = values.size(); + } + + // Matrix–vector multiply (local rows only) + std::vector operator*(const std::vector& x) const { + assert(x.size() == (size_t)nbcol); + std::vector y(nbrow, 0.0); + for(int i=0; i& b, std::vector& x, doub int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Get the rank of the process - int n = A.NbCol(); + int n_local = A.NbRow(); // get the local diagonal block of A std::vector> coefficients; @@ -162,7 +182,7 @@ void CG_Solver::solve(const std::vector& b, std::vector& x, doub // compute the Cholesky factorization of the diagonal block for the preconditioner - Eigen::SparseMatrix B(n, n); + Eigen::SparseMatrix B(n_local, n_local); B.setFromTriplets(coefficients.begin(), coefficients.end()); Eigen::SimplicialCholesky> P(B); From e41193de7b18bd9eb4033d40c096abee827c9eff Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 13:57:53 -0700 Subject: [PATCH 15/28] Create testing.cpp --- testing.cpp | 1 + 1 file changed, 1 insertion(+) create mode 100644 testing.cpp diff --git a/testing.cpp b/testing.cpp new file mode 100644 index 0000000..60187dd --- /dev/null +++ b/testing.cpp @@ -0,0 +1 @@ +// testing file From 2419df575e176f36a3a8700eee93dd49ea5441f1 Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 14:10:08 -0700 Subject: [PATCH 16/28] Update testing.cpp --- testing.cpp | 249 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 248 insertions(+), 1 deletion(-) diff --git a/testing.cpp b/testing.cpp index 60187dd..af46bea 100644 --- a/testing.cpp +++ b/testing.cpp @@ -1 +1,248 @@ -// testing file +#include "common.h" + +#include +#include +#include +#include +#include + +#include + +class CSRMatrix { +public: + int nbrow, nbcol; + std::vector values; + std::vector col_indices; + std::vector row_indices; + + // 1) Full-matrix constructor (optional; you can keep or remove this) + CSRMatrix(const int& nrows=0, const int& ncols=0) + : nbrow(nrows), nbcol(ncols) + { + row_indices.resize(nbrow + 1); + for(int i=0; i 0) { + values.push_back(-1.0); + col_indices.push_back(i-1); + } + values.push_back(2.0); + col_indices.push_back(i); + if(i+1 < nbcol) { + values.push_back(-1.0); + col_indices.push_back(i+1); + } + } + row_indices[nbrow] = values.size(); + } + + // 2) Slice constructor: only build rows [row_start … row_start+nbrow-1] + CSRMatrix(const int& nrows, const int& ncols, const int& row_start) + : nbrow(nrows), nbcol(ncols) + { + row_indices.resize(nbrow + 1); + for(int i_local = 0; i_local < nbrow; ++i_local) { + int ig = row_start + i_local; // global row index + row_indices[i_local] = values.size(); + if (ig > 0) { + values.push_back(-1.0); + col_indices.push_back((ig - 1) - row_start); + } + values.push_back(2.0); + col_indices.push_back(ig - row_start); + if (ig + 1 < nbcol) { + values.push_back(-1.0); + col_indices.push_back((ig + 1) - row_start); + } + } + row_indices[nbrow] = values.size(); + } + + // Matrix–vector multiply (local rows only) + std::vector operator*(const std::vector& x) const { + assert(x.size() == (size_t)nbcol); + std::vector y(nbrow, 0.0); + for(int i=0; i& u, + const std::vector& v) { + assert(u.size() == v.size()); + // 1) local dot-product + double local = 0.0; + for (size_t i = 0; i < u.size(); ++i) { + local += u[i] * v[i]; + } + // 2) all-reduce to get global dot-product + double global = 0.0; + MPI_Allreduce( + &local, // sendbuf + &global, // recvbuf + 1, // count + MPI_DOUBLE, // datatype + MPI_SUM, // op + MPI_COMM_WORLD + ); + return global; +} + +// scalar product (u, v) +double operator,(const std::vector& u, const std::vector& v){ + assert(u.size() == v.size()); + double sp = 0.; + for(int j = 0; j < u.size(); j++) + sp += u[j] * v[j]; + return sp; +} + +// addition of two vectors u+v +std::vector operator+(const std::vector& u, const std::vector& v){ + assert(u.size() == v.size()); + std::vector w = u; + for(int j = 0; j < u.size(); j++) + w[j] += v[j]; + return w; +} + +// multiplication of a vector by a scalar a*u +std::vector operator*(const double& a, const std::vector& u){ + std::vector w(u.size()); + for(int j = 0; j < w.size(); j++) + w[j] = a * u[j]; + return w; +} + +// addition assignment operator, add v to u +void operator+=(std::vector& u, const std::vector& v){ + assert(u.size() == v.size()); + for(int j = 0; j < u.size(); j++) + u[j] += v[j]; +} + +/* block Jacobi preconditioner: perform forward and backward substitution + using the Cholesky factorization of the local diagonal block computed by Eigen */ +std::vector prec(const Eigen::SimplicialCholesky>& P, const std::vector& u){ + Eigen::VectorXd b(u.size()); + for (int i = 0; i < u.size(); i++) + b[i] = u[i]; + Eigen::VectorXd xe = P.solve(b); + std::vector x(u.size()); + for (int i = 0; i < u.size(); i++) + x[i] = xe[i]; + return x; +} + +CSRMatrix A; + + +/* N is the size of the matrix, and n is the number of rows assigned per rank. + * It is your responsibility to generate the input matrix, assuming the ranks are + * partitioned rowwise. + * The input matrix is L + I, where L is the Laplacian of a 1D Possion's equation, + * and I is the identity matrix. + * See the constructor of the Matrix structure as an example. + * The constructor of CG_Solver will not be included in the timing result. + * Note that the starter code only works for 1 rank and it is not efficient. + */ +CG_Solver::CG_Solver(const int& n, const int& N) { + // 1) Discover the MPI rank and the total number of ranks + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + // 2) Compute how many rows we own + int n_local = n; + + // 3) Which global rows are they + int row_start = rank * n_local; + int row_end = row_start + n_local -1; + + std::cout << "Rank" << rank << " owns rows " << row_start << "-" << row_end << "\n"; + + // 4) Pass those into the CSR builder so it only builds that slice + A = CSRMatrix(n_local, N, row_start); +} + +/* The preconditioned conjugate gradient method solving Ax = b with tolerance tol. + * This is the function being evalauted for performance. + * Note that the starter code only works for 1 rank and it is not efficient. + */ +void CG_Solver::solve(const std::vector& b, + std::vector& x, + double tol) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); // get total ranks + + int n_local = A.NbRow(); // how many rows this rank owns + + // p_global holds the full 'p' from all ranks (length n_local*size) + // Ap_local caches our local mat‐vec result (length n_local) + std::vector p_global(n_local * size); + std::vector Ap_local(n_local); + + std::vector> coefficients; + for (int row = 0; row < n_local; ++row) { + for (int idx = A.row_indices[row]; idx < A.row_indices[row+1]; ++idx) { + int col = A.col_indices[idx]; + double val = A.values[idx]; + coefficients.emplace_back(row, col, val); + } + } + Eigen::SparseMatrix B(n_local, n_local); + B.setFromTriplets(coefficients.begin(), coefficients.end()); + Eigen::SimplicialCholesky> P(B); + + // Compute norms and initialize PCG vectors + double norm_b = std::sqrt(global_dot(b, b)); + const double epsilon = tol * norm_b; + std::vector r = b, z = prec(P, b), p = z; + double rz = global_dot(r, z); + + // Main PCG loop + while (true) { + // 1) gather the full 'p' into p_global + MPI_Allgather( + p.data(), n_local, MPI_DOUBLE, + p_global.data(), n_local, MPI_DOUBLE, + MPI_COMM_WORLD + ); + + // 2) local mat‐vec: Ap_local = A * p_global + Ap_local = A * p_global; + + // 3) compute alpha = (r,z)/(p,Ap) + double pAp = global_dot(p, Ap_local); + double alpha = rz / pAp; + + // 4) update x and r locally + for (int i = 0; i < n_local; ++i) { + x[i] += alpha * p[i]; + r[i] -= alpha * Ap_local[i]; + } + + // 5) apply preconditioner + z = prec(P, r); + + // 6) compute new (r,z), check convergence + double rz_new = global_dot(r, z); + if (std::sqrt(rz_new) < epsilon) break; + + // 7) update p and rz for next iteration + double beta = rz_new / rz; + for (int i = 0; i < n_local; ++i) + p[i] = z[i] + beta * p[i]; + rz = rz_new; + } +} From 61af0b17f02f884089e16a0aba63f845c8af933e Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 14:13:04 -0700 Subject: [PATCH 17/28] Update testing.cpp --- testing.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/testing.cpp b/testing.cpp index af46bea..127ad4a 100644 --- a/testing.cpp +++ b/testing.cpp @@ -209,7 +209,7 @@ void CG_Solver::solve(const std::vector& b, const double epsilon = tol * norm_b; std::vector r = b, z = prec(P, b), p = z; double rz = global_dot(r, z); - + int num_it = 0; // Main PCG loop while (true) { // 1) gather the full 'p' into p_global @@ -237,6 +237,11 @@ void CG_Solver::solve(const std::vector& b, // 6) compute new (r,z), check convergence double rz_new = global_dot(r, z); + if (rank == 0) { + double res_norm = std::sqrt(rz_new); + std::cout << "iteration: " << num_it + << "\tresidual: " << res_norm << "\n"; + } if (std::sqrt(rz_new) < epsilon) break; // 7) update p and rz for next iteration From ec587e442f21235ab2e525ed91c0bb501e977d88 Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 14:17:38 -0700 Subject: [PATCH 18/28] Update testing.cpp --- testing.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/testing.cpp b/testing.cpp index 127ad4a..8115bea 100644 --- a/testing.cpp +++ b/testing.cpp @@ -237,12 +237,13 @@ void CG_Solver::solve(const std::vector& b, // 6) compute new (r,z), check convergence double rz_new = global_dot(r, z); - if (rank == 0) { - double res_norm = std::sqrt(rz_new); - std::cout << "iteration: " << num_it + num_it++; + double res_norm = std::sqrt(global_dot(r, r)); + if (rank == 0) { + std::cout << "iteration: " << num_it << "\tresidual: " << res_norm << "\n"; - } - if (std::sqrt(rz_new) < epsilon) break; + } + if (res_norm < epsilon) break; // 7) update p and rz for next iteration double beta = rz_new / rz; From 9e58e600bfad2d9c9903ad014936a36cee2f7837 Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 14:20:43 -0700 Subject: [PATCH 19/28] updated global_dot helper --- distributed_pcg.cpp | 133 +++++++++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 50 deletions(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index 9d95452..8115bea 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -73,6 +73,29 @@ class CSRMatrix { int NbRow() const { return nbrow; } int NbCol() const { return nbcol; } }; + + +/// Compute the inner product of two local-length vectors across all ranks +static double global_dot(const std::vector& u, + const std::vector& v) { + assert(u.size() == v.size()); + // 1) local dot-product + double local = 0.0; + for (size_t i = 0; i < u.size(); ++i) { + local += u[i] * v[i]; + } + // 2) all-reduce to get global dot-product + double global = 0.0; + MPI_Allreduce( + &local, // sendbuf + &global, // recvbuf + 1, // count + MPI_DOUBLE, // datatype + MPI_SUM, // op + MPI_COMM_WORLD + ); + return global; +} // scalar product (u, v) double operator,(const std::vector& u, const std::vector& v){ @@ -155,67 +178,77 @@ CG_Solver::CG_Solver(const int& n, const int& N) { * This is the function being evalauted for performance. * Note that the starter code only works for 1 rank and it is not efficient. */ -void CG_Solver::solve(const std::vector& b, std::vector& x, double tol) { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Get the rank of the process +void CG_Solver::solve(const std::vector& b, + std::vector& x, + double tol) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); // get total ranks + + int n_local = A.NbRow(); // how many rows this rank owns - int n_local = A.NbRow(); + // p_global holds the full 'p' from all ranks (length n_local*size) + // Ap_local caches our local mat‐vec result (length n_local) + std::vector p_global(n_local * size); + std::vector Ap_local(n_local); - // get the local diagonal block of A std::vector> coefficients; - for (int row = 0; row < A.NbRow(); ++row) { - for (int idx = A.row_indices[row]; idx < A.row_indices[row + 1]; ++idx) { - int col = A.col_indices[idx]; - double val = A.values[idx]; - coefficients.push_back(Eigen::Triplet(row, col, val)); + for (int row = 0; row < n_local; ++row) { + for (int idx = A.row_indices[row]; idx < A.row_indices[row+1]; ++idx) { + int col = A.col_indices[idx]; + double val = A.values[idx]; + coefficients.emplace_back(row, col, val); } } - - // ========================================== - // UNCOMMENT TO PRINT CHECK COEFFICIENTS - // ========================================== - - // std::cout << "\nTriplets from CSR matrix:\n"; - // for (const auto& t : coefficients) { - // std::cout << "(" << t.row() << ", " << t.col() << ") = " << t.value() << "\n"; - // } - - - // compute the Cholesky factorization of the diagonal block for the preconditioner Eigen::SparseMatrix B(n_local, n_local); B.setFromTriplets(coefficients.begin(), coefficients.end()); Eigen::SimplicialCholesky> P(B); - const double epsilon = tol * std::sqrt((b, b)); - x.assign(b.size(), 0.); + // Compute norms and initialize PCG vectors + double norm_b = std::sqrt(global_dot(b, b)); + const double epsilon = tol * norm_b; std::vector r = b, z = prec(P, b), p = z; - double alpha = 0., beta = 0.; - double res = std::sqrt((r, r)); - - int num_it = 0; - - // std::vector Ap = A * p; - // std::cout << "A * p = ["; - // for (size_t i = 0; i < Ap.size(); ++i) { - // std::cout << Ap[i]; - // if (i < Ap.size() - 1) std::cout << ", "; - // } - // std::cout << "]\n"; + double rz = global_dot(r, z); + int num_it = 0; + // Main PCG loop + while (true) { + // 1) gather the full 'p' into p_global + MPI_Allgather( + p.data(), n_local, MPI_DOUBLE, + p_global.data(), n_local, MPI_DOUBLE, + MPI_COMM_WORLD + ); + + // 2) local mat‐vec: Ap_local = A * p_global + Ap_local = A * p_global; + + // 3) compute alpha = (r,z)/(p,Ap) + double pAp = global_dot(p, Ap_local); + double alpha = rz / pAp; + + // 4) update x and r locally + for (int i = 0; i < n_local; ++i) { + x[i] += alpha * p[i]; + r[i] -= alpha * Ap_local[i]; + } - - while(res >= epsilon) { - alpha = (r, z) / (p, A * p); - x += (+alpha) * p; - r += (-alpha) * (A * p); + // 5) apply preconditioner z = prec(P, r); - beta = (r, z) / (alpha * (p, A * p)); - p = z + beta * p; - res = std::sqrt((r, r)); - - num_it++; - if (rank == 0 && !(num_it % 1)) { - std::cout << "iteration: " << num_it << "\t"; - std::cout << "residual: " << res << "\n"; + + // 6) compute new (r,z), check convergence + double rz_new = global_dot(r, z); + num_it++; + double res_norm = std::sqrt(global_dot(r, r)); + if (rank == 0) { + std::cout << "iteration: " << num_it + << "\tresidual: " << res_norm << "\n"; } + if (res_norm < epsilon) break; + + // 7) update p and rz for next iteration + double beta = rz_new / rz; + for (int i = 0; i < n_local; ++i) + p[i] = z[i] + beta * p[i]; + rz = rz_new; } - } +} From 5f2b69bdeed5865c1a454af1129829397ea95769 Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 14:28:24 -0700 Subject: [PATCH 20/28] Update testing.cpp --- testing.cpp | 58 +++++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/testing.cpp b/testing.cpp index 8115bea..25e1ba6 100644 --- a/testing.cpp +++ b/testing.cpp @@ -183,69 +183,71 @@ void CG_Solver::solve(const std::vector& b, double tol) { int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); // get total ranks + MPI_Comm_size(MPI_COMM_WORLD, &size); - int n_local = A.NbRow(); // how many rows this rank owns + int n_local = A.NbRow(); // rows per rank - // p_global holds the full 'p' from all ranks (length n_local*size) - // Ap_local caches our local mat‐vec result (length n_local) + // p_global: concatenation of all ranks' p (length = n_local * size) + // Ap_local: to cache our local y = A * p_global (length = n_local) std::vector p_global(n_local * size); std::vector Ap_local(n_local); + // Build the local diagonal block for the block‐Jacobi preconditioner std::vector> coefficients; - for (int row = 0; row < n_local; ++row) { - for (int idx = A.row_indices[row]; idx < A.row_indices[row+1]; ++idx) { - int col = A.col_indices[idx]; - double val = A.values[idx]; - coefficients.emplace_back(row, col, val); + for (int i = 0; i < n_local; ++i) { + for (int k = A.row_indices[i]; k < A.row_indices[i+1]; ++k) { + coefficients.emplace_back(i, A.col_indices[k], A.values[k]); } } Eigen::SparseMatrix B(n_local, n_local); B.setFromTriplets(coefficients.begin(), coefficients.end()); Eigen::SimplicialCholesky> P(B); - // Compute norms and initialize PCG vectors - double norm_b = std::sqrt(global_dot(b, b)); - const double epsilon = tol * norm_b; - std::vector r = b, z = prec(P, b), p = z; + // Initial norms and vectors + double norm_b = std::sqrt(global_dot(b, b)); + const double eps = tol * norm_b; + std::vector r = b; + std::vector z = prec(P, b); + std::vector p = z; double rz = global_dot(r, z); - int num_it = 0; - // Main PCG loop + + int num_it = 0; + // Main PCG loop while (true) { - // 1) gather the full 'p' into p_global + // 1) Gather the full search direction p into p_global MPI_Allgather( - p.data(), n_local, MPI_DOUBLE, - p_global.data(), n_local, MPI_DOUBLE, + p.data(), n_local, MPI_DOUBLE, + p_global.data(),n_local, MPI_DOUBLE, MPI_COMM_WORLD ); - // 2) local mat‐vec: Ap_local = A * p_global + // 2) Local mat–vec using CSR: Ap_local = A * p_global Ap_local = A * p_global; - // 3) compute alpha = (r,z)/(p,Ap) + // 3) Compute alpha = (r,z) / (p,Ap) double pAp = global_dot(p, Ap_local); double alpha = rz / pAp; - // 4) update x and r locally + // 4) Update x and r (local slices) for (int i = 0; i < n_local; ++i) { x[i] += alpha * p[i]; r[i] -= alpha * Ap_local[i]; } - // 5) apply preconditioner + // 5) Apply preconditioner: z = P^{-1} r z = prec(P, r); - // 6) compute new (r,z), check convergence + // 6) Compute new (r,z), print, and check convergence double rz_new = global_dot(r, z); - num_it++; - double res_norm = std::sqrt(global_dot(r, r)); + num_it++; if (rank == 0) { - std::cout << "iteration: " << num_it + double res_norm = std::sqrt(global_dot(r, r)); + std::cout << "iteration: " << num_it << "\tresidual: " << res_norm << "\n"; } - if (res_norm < epsilon) break; + if (std::sqrt(rz_new) < eps) break; - // 7) update p and rz for next iteration + // 7) Update search direction p and rz for next iter double beta = rz_new / rz; for (int i = 0; i < n_local; ++i) p[i] = z[i] + beta * p[i]; From 4cb4825969b4d06d2a0210a4066e6aaa1a9c75bc Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 14:31:25 -0700 Subject: [PATCH 21/28] Update mpi_allgather --- distributed_pcg.cpp | 58 +++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index 8115bea..25e1ba6 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -183,69 +183,71 @@ void CG_Solver::solve(const std::vector& b, double tol) { int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); // get total ranks + MPI_Comm_size(MPI_COMM_WORLD, &size); - int n_local = A.NbRow(); // how many rows this rank owns + int n_local = A.NbRow(); // rows per rank - // p_global holds the full 'p' from all ranks (length n_local*size) - // Ap_local caches our local mat‐vec result (length n_local) + // p_global: concatenation of all ranks' p (length = n_local * size) + // Ap_local: to cache our local y = A * p_global (length = n_local) std::vector p_global(n_local * size); std::vector Ap_local(n_local); + // Build the local diagonal block for the block‐Jacobi preconditioner std::vector> coefficients; - for (int row = 0; row < n_local; ++row) { - for (int idx = A.row_indices[row]; idx < A.row_indices[row+1]; ++idx) { - int col = A.col_indices[idx]; - double val = A.values[idx]; - coefficients.emplace_back(row, col, val); + for (int i = 0; i < n_local; ++i) { + for (int k = A.row_indices[i]; k < A.row_indices[i+1]; ++k) { + coefficients.emplace_back(i, A.col_indices[k], A.values[k]); } } Eigen::SparseMatrix B(n_local, n_local); B.setFromTriplets(coefficients.begin(), coefficients.end()); Eigen::SimplicialCholesky> P(B); - // Compute norms and initialize PCG vectors - double norm_b = std::sqrt(global_dot(b, b)); - const double epsilon = tol * norm_b; - std::vector r = b, z = prec(P, b), p = z; + // Initial norms and vectors + double norm_b = std::sqrt(global_dot(b, b)); + const double eps = tol * norm_b; + std::vector r = b; + std::vector z = prec(P, b); + std::vector p = z; double rz = global_dot(r, z); - int num_it = 0; - // Main PCG loop + + int num_it = 0; + // Main PCG loop while (true) { - // 1) gather the full 'p' into p_global + // 1) Gather the full search direction p into p_global MPI_Allgather( - p.data(), n_local, MPI_DOUBLE, - p_global.data(), n_local, MPI_DOUBLE, + p.data(), n_local, MPI_DOUBLE, + p_global.data(),n_local, MPI_DOUBLE, MPI_COMM_WORLD ); - // 2) local mat‐vec: Ap_local = A * p_global + // 2) Local mat–vec using CSR: Ap_local = A * p_global Ap_local = A * p_global; - // 3) compute alpha = (r,z)/(p,Ap) + // 3) Compute alpha = (r,z) / (p,Ap) double pAp = global_dot(p, Ap_local); double alpha = rz / pAp; - // 4) update x and r locally + // 4) Update x and r (local slices) for (int i = 0; i < n_local; ++i) { x[i] += alpha * p[i]; r[i] -= alpha * Ap_local[i]; } - // 5) apply preconditioner + // 5) Apply preconditioner: z = P^{-1} r z = prec(P, r); - // 6) compute new (r,z), check convergence + // 6) Compute new (r,z), print, and check convergence double rz_new = global_dot(r, z); - num_it++; - double res_norm = std::sqrt(global_dot(r, r)); + num_it++; if (rank == 0) { - std::cout << "iteration: " << num_it + double res_norm = std::sqrt(global_dot(r, r)); + std::cout << "iteration: " << num_it << "\tresidual: " << res_norm << "\n"; } - if (res_norm < epsilon) break; + if (std::sqrt(rz_new) < eps) break; - // 7) update p and rz for next iteration + // 7) Update search direction p and rz for next iter double beta = rz_new / rz; for (int i = 0; i < n_local; ++i) p[i] = z[i] + beta * p[i]; From 37735ed605bb20b037c9554370b741b979d4a347 Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 14:35:28 -0700 Subject: [PATCH 22/28] Update testing.cpp --- testing.cpp | 90 ++++++++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/testing.cpp b/testing.cpp index 25e1ba6..60ca510 100644 --- a/testing.cpp +++ b/testing.cpp @@ -204,53 +204,51 @@ void CG_Solver::solve(const std::vector& b, Eigen::SimplicialCholesky> P(B); // Initial norms and vectors - double norm_b = std::sqrt(global_dot(b, b)); - const double eps = tol * norm_b; - std::vector r = b; - std::vector z = prec(P, b); - std::vector p = z; - double rz = global_dot(r, z); - - int num_it = 0; - // Main PCG loop - while (true) { - // 1) Gather the full search direction p into p_global - MPI_Allgather( - p.data(), n_local, MPI_DOUBLE, - p_global.data(),n_local, MPI_DOUBLE, - MPI_COMM_WORLD - ); - - // 2) Local mat–vec using CSR: Ap_local = A * p_global - Ap_local = A * p_global; - - // 3) Compute alpha = (r,z) / (p,Ap) - double pAp = global_dot(p, Ap_local); - double alpha = rz / pAp; - - // 4) Update x and r (local slices) - for (int i = 0; i < n_local; ++i) { - x[i] += alpha * p[i]; - r[i] -= alpha * Ap_local[i]; - } - - // 5) Apply preconditioner: z = P^{-1} r - z = prec(P, r); + double norm_b = std::sqrt(global_dot(b, b)); +const double eps = tol * norm_b; +std::vector r = b, z = prec(P, b), p = z; +double rz = global_dot(r, z); +int num_it = 0; + +// Main PCG loop +while (true) { + // 1) Gather global search direction + MPI_Allgather( + p.data(), n_local, MPI_DOUBLE, + p_global.data(), n_local, MPI_DOUBLE, + MPI_COMM_WORLD + ); + + // 2) Local mat–vec: Ap_local = A * p_global + Ap_local = A * p_global; + + // 3) Compute alpha = (r,z) / (p,Ap) + double pAp = global_dot(p, Ap_local); + double alpha = rz / pAp; + + // 4) Update x and r locally + for (int i = 0; i < n_local; ++i) { + x[i] += alpha * p[i]; + r[i] -= alpha * Ap_local[i]; + } - // 6) Compute new (r,z), print, and check convergence - double rz_new = global_dot(r, z); - num_it++; - if (rank == 0) { - double res_norm = std::sqrt(global_dot(r, r)); - std::cout << "iteration: " << num_it - << "\tresidual: " << res_norm << "\n"; - } - if (std::sqrt(rz_new) < eps) break; + // 5) Apply preconditioner locally + z = prec(P, r); - // 7) Update search direction p and rz for next iter - double beta = rz_new / rz; - for (int i = 0; i < n_local; ++i) - p[i] = z[i] + beta * p[i]; - rz = rz_new; + // 6) Compute new (r,z), print, and test convergence + double rz_new = global_dot(r, z); + num_it++; + if (rank == 0) { + double res_norm = std::sqrt(global_dot(r, r)); + std::cout << "iteration: " << num_it + << "\tresidual: " << res_norm << "\n"; } + if (std::sqrt(rz_new) < eps) + break; + + // 7) Update search direction and rz for next iteration + double beta = rz_new / rz; + for (int i = 0; i < n_local; ++i) + p[i] = z[i] + beta * p[i]; + rz = rz_new; } From 480d858ac12bd7f7d1d486cd5ad68bd3755c5891 Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 14:38:46 -0700 Subject: [PATCH 23/28] Update testing.cpp --- testing.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/testing.cpp b/testing.cpp index 60ca510..9ea9f03 100644 --- a/testing.cpp +++ b/testing.cpp @@ -252,3 +252,4 @@ while (true) { p[i] = z[i] + beta * p[i]; rz = rz_new; } +} From 7fa80e8fa28487c71363e627c5ad5de06ff0bdf3 Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 14:51:57 -0700 Subject: [PATCH 24/28] Update testing.cpp --- testing.cpp | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/testing.cpp b/testing.cpp index 9ea9f03..37692fc 100644 --- a/testing.cpp +++ b/testing.cpp @@ -40,24 +40,30 @@ class CSRMatrix { CSRMatrix(const int& nrows, const int& ncols, const int& row_start) : nbrow(nrows), nbcol(ncols) { - row_indices.resize(nbrow + 1); - for(int i_local = 0; i_local < nbrow; ++i_local) { - int ig = row_start + i_local; // global row index - row_indices[i_local] = values.size(); - if (ig > 0) { - values.push_back(-1.0); - col_indices.push_back((ig - 1) - row_start); - } - values.push_back(2.0); - col_indices.push_back(ig - row_start); - if (ig + 1 < nbcol) { - values.push_back(-1.0); - col_indices.push_back((ig + 1) - row_start); - } + row_indices.resize(nbrow + 1); + for(int i_local = 0; i_local < nbrow; ++i_local) { + int ig = row_start + i_local; // global row + + row_indices[i_local] = values.size(); + + // sub-diagonal *inside* local block? + if (i_local > 0) { + values.push_back(-1.0); + col_indices.push_back(ig - 1); // global col index } - row_indices[nbrow] = values.size(); + + // diagonal + values.push_back(2.0); + col_indices.push_back(ig); + + // super-diagonal *inside* local block? + if (i_local + 1 < nbrow) { + values.push_back(-1.0); + col_indices.push_back(ig + 1); + } + } + row_indices[nbrow] = values.size(); } - // Matrix–vector multiply (local rows only) std::vector operator*(const std::vector& x) const { assert(x.size() == (size_t)nbcol); From 01851ff91f1259d65afe7a0034f058437cb5270e Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 14:55:19 -0700 Subject: [PATCH 25/28] Update testing.cpp --- testing.cpp | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/testing.cpp b/testing.cpp index 37692fc..17f939a 100644 --- a/testing.cpp +++ b/testing.cpp @@ -40,29 +40,22 @@ class CSRMatrix { CSRMatrix(const int& nrows, const int& ncols, const int& row_start) : nbrow(nrows), nbcol(ncols) { - row_indices.resize(nbrow + 1); - for(int i_local = 0; i_local < nbrow; ++i_local) { - int ig = row_start + i_local; // global row - - row_indices[i_local] = values.size(); - - // sub-diagonal *inside* local block? - if (i_local > 0) { - values.push_back(-1.0); - col_indices.push_back(ig - 1); // global col index - } - - // diagonal - values.push_back(2.0); - col_indices.push_back(ig); - - // super-diagonal *inside* local block? - if (i_local + 1 < nbrow) { - values.push_back(-1.0); - col_indices.push_back(ig + 1); + row_indices.resize(nbrow + 1); + for(int i_local = 0; i_local < nbrow; ++i_local) { + int ig = row_start + i_local; // global row index + row_indices[i_local] = values.size(); + if (ig > 0) { + values.push_back(-1.0); + col_indices.push_back((ig - 1) - row_start); + } + values.push_back(2.0); + col_indices.push_back(ig - row_start); + if (ig + 1 < nbcol) { + values.push_back(-1.0); + col_indices.push_back((ig + 1) - row_start); + } } - } - row_indices[nbrow] = values.size(); + row_indices[nbrow] = values.size(); } // Matrix–vector multiply (local rows only) std::vector operator*(const std::vector& x) const { From fe37fff7885f56c3f364db2598079029ab320ceb Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 14:55:40 -0700 Subject: [PATCH 26/28] Update distributed_pcg.cpp --- distributed_pcg.cpp | 92 ++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 47 deletions(-) diff --git a/distributed_pcg.cpp b/distributed_pcg.cpp index 25e1ba6..17f939a 100644 --- a/distributed_pcg.cpp +++ b/distributed_pcg.cpp @@ -57,7 +57,6 @@ class CSRMatrix { } row_indices[nbrow] = values.size(); } - // Matrix–vector multiply (local rows only) std::vector operator*(const std::vector& x) const { assert(x.size() == (size_t)nbcol); @@ -204,53 +203,52 @@ void CG_Solver::solve(const std::vector& b, Eigen::SimplicialCholesky> P(B); // Initial norms and vectors - double norm_b = std::sqrt(global_dot(b, b)); - const double eps = tol * norm_b; - std::vector r = b; - std::vector z = prec(P, b); - std::vector p = z; - double rz = global_dot(r, z); - - int num_it = 0; - // Main PCG loop - while (true) { - // 1) Gather the full search direction p into p_global - MPI_Allgather( - p.data(), n_local, MPI_DOUBLE, - p_global.data(),n_local, MPI_DOUBLE, - MPI_COMM_WORLD - ); - - // 2) Local mat–vec using CSR: Ap_local = A * p_global - Ap_local = A * p_global; - - // 3) Compute alpha = (r,z) / (p,Ap) - double pAp = global_dot(p, Ap_local); - double alpha = rz / pAp; - - // 4) Update x and r (local slices) - for (int i = 0; i < n_local; ++i) { - x[i] += alpha * p[i]; - r[i] -= alpha * Ap_local[i]; - } - - // 5) Apply preconditioner: z = P^{-1} r - z = prec(P, r); + double norm_b = std::sqrt(global_dot(b, b)); +const double eps = tol * norm_b; +std::vector r = b, z = prec(P, b), p = z; +double rz = global_dot(r, z); +int num_it = 0; + +// Main PCG loop +while (true) { + // 1) Gather global search direction + MPI_Allgather( + p.data(), n_local, MPI_DOUBLE, + p_global.data(), n_local, MPI_DOUBLE, + MPI_COMM_WORLD + ); + + // 2) Local mat–vec: Ap_local = A * p_global + Ap_local = A * p_global; + + // 3) Compute alpha = (r,z) / (p,Ap) + double pAp = global_dot(p, Ap_local); + double alpha = rz / pAp; + + // 4) Update x and r locally + for (int i = 0; i < n_local; ++i) { + x[i] += alpha * p[i]; + r[i] -= alpha * Ap_local[i]; + } - // 6) Compute new (r,z), print, and check convergence - double rz_new = global_dot(r, z); - num_it++; - if (rank == 0) { - double res_norm = std::sqrt(global_dot(r, r)); - std::cout << "iteration: " << num_it - << "\tresidual: " << res_norm << "\n"; - } - if (std::sqrt(rz_new) < eps) break; + // 5) Apply preconditioner locally + z = prec(P, r); - // 7) Update search direction p and rz for next iter - double beta = rz_new / rz; - for (int i = 0; i < n_local; ++i) - p[i] = z[i] + beta * p[i]; - rz = rz_new; + // 6) Compute new (r,z), print, and test convergence + double rz_new = global_dot(r, z); + num_it++; + if (rank == 0) { + double res_norm = std::sqrt(global_dot(r, r)); + std::cout << "iteration: " << num_it + << "\tresidual: " << res_norm << "\n"; } + if (std::sqrt(rz_new) < eps) + break; + + // 7) Update search direction and rz for next iteration + double beta = rz_new / rz; + for (int i = 0; i < n_local; ++i) + p[i] = z[i] + beta * p[i]; + rz = rz_new; +} } From 204ca3344e442d84607a8430f904d063fe179147 Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 14:59:09 -0700 Subject: [PATCH 27/28] Update testing.cpp --- testing.cpp | 49 +++++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/testing.cpp b/testing.cpp index 17f939a..d42d9f6 100644 --- a/testing.cpp +++ b/testing.cpp @@ -40,22 +40,29 @@ class CSRMatrix { CSRMatrix(const int& nrows, const int& ncols, const int& row_start) : nbrow(nrows), nbcol(ncols) { - row_indices.resize(nbrow + 1); - for(int i_local = 0; i_local < nbrow; ++i_local) { - int ig = row_start + i_local; // global row index - row_indices[i_local] = values.size(); - if (ig > 0) { - values.push_back(-1.0); - col_indices.push_back((ig - 1) - row_start); - } - values.push_back(2.0); - col_indices.push_back(ig - row_start); - if (ig + 1 < nbcol) { - values.push_back(-1.0); - col_indices.push_back((ig + 1) - row_start); - } + row_indices.resize(nbrow + 1); + for(int i_local = 0; i_local < nbrow; ++i_local) { + int ig = row_start + i_local; // global row + + row_indices[i_local] = values.size(); + + // sub-diagonal *inside* local block? + if (i_local > 0) { + values.push_back(-1.0); + col_indices.push_back(ig - 1); // global col index } - row_indices[nbrow] = values.size(); + + // diagonal + values.push_back(2.0); + col_indices.push_back(ig); + + // super-diagonal *inside* local block? + if (i_local + 1 < nbrow) { + values.push_back(-1.0); + col_indices.push_back(ig + 1); + } + } + row_indices[nbrow] = values.size(); } // Matrix–vector multiply (local rows only) std::vector operator*(const std::vector& x) const { @@ -185,6 +192,7 @@ void CG_Solver::solve(const std::vector& b, MPI_Comm_size(MPI_COMM_WORLD, &size); int n_local = A.NbRow(); // rows per rank + int row_start = rank * n_local; // p_global: concatenation of all ranks' p (length = n_local * size) // Ap_local: to cache our local y = A * p_global (length = n_local) @@ -193,9 +201,14 @@ void CG_Solver::solve(const std::vector& b, // Build the local diagonal block for the block‐Jacobi preconditioner std::vector> coefficients; - for (int i = 0; i < n_local; ++i) { - for (int k = A.row_indices[i]; k < A.row_indices[i+1]; ++k) { - coefficients.emplace_back(i, A.col_indices[k], A.values[k]); + coefficients.reserve(3 * n_local); + for (int i_local = 0; i_local < n_local; ++i_local) { + // scan your CSR row i_local + for (int idx = A.row_indices[i_local]; idx < A.row_indices[i_local+1]; ++idx) { + int global_col = A.col_indices[idx]; // e.g. ig-1, ig, or ig+1 + int local_col = global_col - row_start; // now in [0..n_local-1] + double val = A.values[idx]; + coefficients.emplace_back(i_local, local_col, val); } } Eigen::SparseMatrix B(n_local, n_local); From fbd512b173d699ab21231da5b5a35facbcf1612c Mon Sep 17 00:00:00 2001 From: Brandon Ton Date: Sun, 4 May 2025 15:15:45 -0700 Subject: [PATCH 28/28] Update testing.cpp --- testing.cpp | 38 ++++++++------------------------------ 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/testing.cpp b/testing.cpp index d42d9f6..c8c3ba1 100644 --- a/testing.cpp +++ b/testing.cpp @@ -16,55 +16,33 @@ class CSRMatrix { std::vector row_indices; // 1) Full-matrix constructor (optional; you can keep or remove this) - CSRMatrix(const int& nrows=0, const int& ncols=0) - : nbrow(nrows), nbcol(ncols) - { - row_indices.resize(nbrow + 1); - for(int i=0; i 0) { - values.push_back(-1.0); - col_indices.push_back(i-1); - } - values.push_back(2.0); - col_indices.push_back(i); - if(i+1 < nbcol) { - values.push_back(-1.0); - col_indices.push_back(i+1); - } - } - row_indices[nbrow] = values.size(); - } - - // 2) Slice constructor: only build rows [row_start … row_start+nbrow-1] CSRMatrix(const int& nrows, const int& ncols, const int& row_start) : nbrow(nrows), nbcol(ncols) { row_indices.resize(nbrow + 1); - for(int i_local = 0; i_local < nbrow; ++i_local) { - int ig = row_start + i_local; // global row - + for (int i_local = 0; i_local < nbrow; ++i_local) { + int ig = row_start + i_local; // global row index row_indices[i_local] = values.size(); - // sub-diagonal *inside* local block? - if (i_local > 0) { + // global sub‐diagonal? + if (ig > 0) { values.push_back(-1.0); - col_indices.push_back(ig - 1); // global col index + col_indices.push_back(ig - 1); } // diagonal values.push_back(2.0); col_indices.push_back(ig); - // super-diagonal *inside* local block? - if (i_local + 1 < nbrow) { + // global super‐diagonal? + if (ig + 1 < nbcol) { values.push_back(-1.0); col_indices.push_back(ig + 1); } } row_indices[nbrow] = values.size(); } - // Matrix–vector multiply (local rows only) + // Matrix–vector multiply (local rows only) std::vector operator*(const std::vector& x) const { assert(x.size() == (size_t)nbcol); std::vector y(nbrow, 0.0);