Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"files.associations": {
"ostream": "cpp"
}
}
235 changes: 169 additions & 66 deletions distributed_pcg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,38 +8,93 @@

#include <Eigen/Sparse>

class Matrix{
public:
typedef std::pair<int, int> N2;

std::map<N2, double> data;
int nbrow;
int nbcol;

Matrix(const int& nr = 0, const int& nc = 0): nbrow(nr), nbcol(nc) {
for (int i = 0; i < nc; ++i) {
data[std::make_pair(i, i)] = 3.0;
if (i - 1 >= 0) data[std::make_pair(i, i - 1)] = -1.0;
if (i + 1 < nc) data[std::make_pair(i, i + 1)] = -1.0;
}
};

int NbRow() const {return nbrow;}
int NbCol() const {return nbcol;}

// matrix-vector product with vector xi
std::vector<double> operator*(const std::vector<double>& xi) const {
std::vector<double> b(NbRow(), 0.);
for(auto it = data.begin(); it != data.end(); ++it){
int j = (it->first).first;
int k = (it->first).second;
double Mjk = it->second;
b[j] += Mjk * xi[k];
}

return b;
class CSRMatrix {
public:
int nbrow, nbcol;
std::vector<double> values;
std::vector<int> col_indices;
std::vector<int> row_indices;

// 1) Full-matrix constructor (optional; you can keep or remove this)
CSRMatrix(const int& nrows=0, const int& ncols=0)
: nbrow(nrows), nbcol(ncols)
{
row_indices.resize(nbrow + 1);
for(int i=0; i<nbrow; ++i) {
row_indices[i] = values.size();
if(i > 0) {
values.push_back(-1.0);
col_indices.push_back(i-1);
}
values.push_back(2.0);
col_indices.push_back(i);
if(i+1 < nbcol) {
values.push_back(-1.0);
col_indices.push_back(i+1);
}
}
row_indices[nbrow] = values.size();
}

// 2) Slice constructor: only build rows [row_start … row_start+nbrow-1]
CSRMatrix(const int& nrows, const int& ncols, const int& row_start)
: nbrow(nrows), nbcol(ncols)
{
row_indices.resize(nbrow + 1);
for(int i_local = 0; i_local < nbrow; ++i_local) {
int ig = row_start + i_local; // global row index
row_indices[i_local] = values.size();
if (ig > 0) {
values.push_back(-1.0);
col_indices.push_back((ig - 1) - row_start);
}
values.push_back(2.0);
col_indices.push_back(ig - row_start);
if (ig + 1 < nbcol) {
values.push_back(-1.0);
col_indices.push_back((ig + 1) - row_start);
}
}
row_indices[nbrow] = values.size();
}
// Matrix–vector multiply (local rows only)
std::vector<double> operator*(const std::vector<double>& x) const {
assert(x.size() == (size_t)nbcol);
std::vector<double> y(nbrow, 0.0);
for(int i=0; i<nbrow; ++i) {
for(int k = row_indices[i]; k < row_indices[i+1]; ++k) {
y[i] += values[k] * x[col_indices[k]];
}
}
return y;
}

int NbRow() const { return nbrow; }
int NbCol() const { return nbcol; }
};


/// Compute the inner product of two local-length vectors across all ranks
static double global_dot(const std::vector<double>& u,
const std::vector<double>& v) {
assert(u.size() == v.size());
// 1) local dot-product
double local = 0.0;
for (size_t i = 0; i < u.size(); ++i) {
local += u[i] * v[i];
}
// 2) all-reduce to get global dot-product
double global = 0.0;
MPI_Allreduce(
&local, // sendbuf
&global, // recvbuf
1, // count
MPI_DOUBLE, // datatype
MPI_SUM, // op
MPI_COMM_WORLD
);
return global;
}

// scalar product (u, v)
double operator,(const std::vector<double>& u, const std::vector<double>& v){
Expand Down Expand Up @@ -87,7 +142,8 @@ std::vector<double> prec(const Eigen::SimplicialCholesky<Eigen::SparseMatrix<dou
return x;
}

Matrix A;
CSRMatrix A;


/* N is the size of the matrix, and n is the number of rows assigned per rank.
* It is your responsibility to generate the input matrix, assuming the ranks are
Expand All @@ -99,53 +155,100 @@ Matrix A;
* Note that the starter code only works for 1 rank and it is not efficient.
*/
CG_Solver::CG_Solver(const int& n, const int& N) {
A = Matrix(n, N);
// 1) Discover the MPI rank and the total number of ranks
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);

// 2) Compute how many rows we own
int n_local = n;

// 3) Which global rows are they
int row_start = rank * n_local;
int row_end = row_start + n_local -1;

std::cout << "Rank" << rank << " owns rows " << row_start << "-" << row_end << "\n";

// 4) Pass those into the CSR builder so it only builds that slice
A = CSRMatrix(n_local, N, row_start);
}

/* The preconditioned conjugate gradient method solving Ax = b with tolerance tol.
* This is the function being evalauted for performance.
* Note that the starter code only works for 1 rank and it is not efficient.
*/
void CG_Solver::solve(const std::vector<double>& b, std::vector<double>& x, double tol) {
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Get the rank of the process
void CG_Solver::solve(const std::vector<double>& b,
std::vector<double>& x,
double tol) {
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);

int n = A.NbCol();
int n_local = A.NbRow(); // rows per rank

// get the local diagonal block of A
// p_global: concatenation of all ranks' p (length = n_local * size)
// Ap_local: to cache our local y = A * p_global (length = n_local)
std::vector<double> p_global(n_local * size);
std::vector<double> Ap_local(n_local);

// Build the local diagonal block for the block‐Jacobi preconditioner
std::vector<Eigen::Triplet<double>> coefficients;
for(auto it = A.data.begin(); it != A.data.end(); ++it){
int j = (it->first).first;
int k = (it->first).second;
coefficients.push_back(Eigen::Triplet<double>(j, k, it -> second));
for (int i = 0; i < n_local; ++i) {
for (int k = A.row_indices[i]; k < A.row_indices[i+1]; ++k) {
coefficients.emplace_back(i, A.col_indices[k], A.values[k]);
}
}

// compute the Cholesky factorization of the diagonal block for the preconditioner
Eigen::SparseMatrix<double> B(n, n);
Eigen::SparseMatrix<double> B(n_local, n_local);
B.setFromTriplets(coefficients.begin(), coefficients.end());
Eigen::SimplicialCholesky<Eigen::SparseMatrix<double>> P(B);

const double epsilon = tol * std::sqrt((b, b));
x.assign(b.size(), 0.);
std::vector<double> r = b, z = prec(P, b), p = z;
double alpha = 0., beta = 0.;
double res = std::sqrt((r, r));
// Initial norms and vectors
double norm_b = std::sqrt(global_dot(b, b));
const double eps = tol * norm_b;
std::vector<double> r = b, z = prec(P, b), p = z;
double rz = global_dot(r, z);
int num_it = 0;

// Main PCG loop
while (true) {
// 1) Gather global search direction
MPI_Allgather(
p.data(), n_local, MPI_DOUBLE,
p_global.data(), n_local, MPI_DOUBLE,
MPI_COMM_WORLD
);

// 2) Local mat–vec: Ap_local = A * p_global
Ap_local = A * p_global;

// 3) Compute alpha = (r,z) / (p,Ap)
double pAp = global_dot(p, Ap_local);
double alpha = rz / pAp;

// 4) Update x and r locally
for (int i = 0; i < n_local; ++i) {
x[i] += alpha * p[i];
r[i] -= alpha * Ap_local[i];
}

int num_it = 0;

while(res >= epsilon) {
alpha = (r, z) / (p, A * p);
x += (+alpha) * p;
r += (-alpha) * (A * p);
z = prec(P, r);
beta = (r, z) / (alpha * (p, A * p));
p = z + beta * p;
res = std::sqrt((r, r));

num_it++;
if (rank == 0 && !(num_it % 1)) {
std::cout << "iteration: " << num_it << "\t";
std::cout << "residual: " << res << "\n";
}
// 5) Apply preconditioner locally
z = prec(P, r);

// 6) Compute new (r,z), print, and test convergence
double rz_new = global_dot(r, z);
num_it++;
if (rank == 0) {
double res_norm = std::sqrt(global_dot(r, r));
std::cout << "iteration: " << num_it
<< "\tresidual: " << res_norm << "\n";
}
}
if (std::sqrt(rz_new) < eps)
break;

// 7) Update search direction and rz for next iteration
double beta = rz_new / rz;
for (int i = 0; i < n_local; ++i)
p[i] = z[i] + beta * p[i];
rz = rz_new;
}
}
Loading