Page MenuHomec4science

CG_Parallel.cpp
No OneTemporary

File Metadata

Created
Tue, Jun 4, 20:30

CG_Parallel.cpp

//
// Created by shernand on 29/05/19.
//
/**
* @file CG_Serial.cpp
* @author Sergio Hernandez
* This file is part of the Conjugate Gradient Project
*
* This class implements the parallel version of the Conjugate Gradient
*
*/
#include <ctime>
#include <mpi/mpi.h>
#include <cmath>
#include <iostream>
#include "CG_Parallel.hpp"
/**
* Complete constructor of the CG solver.
* @param matrixA std::vector<double> NxN symmetric definite positive
* @param vectorB Nx1 std::vector<double> right side vector
* @param tol double Tolerance for our stopping criterion
* @param maxIter int Maximum number of iterations of the stopping criterion
*/
CG_Parallel::CG_Parallel(std::vector<double> const &matrixA, std::vector<double> const &vectorB, double const &tol,
int const &maxIter) {
setMatrixA(matrixA);
setVectorB(vectorB);
setTol(tol);
setMaxIterations(maxIter);
std::vector<double> timings(3,0.0);
}
/**
* Computes the CG solver
* @param x_0 Nx1 std::vecotr<double> the initial guess for the solution
* @return std::tuple<std::vector<double> x, int n_iterations> The tuple <solution, (Nx1 double) and how many iterations
* it took.
*/
std::tuple<std::vector<double>,int> CG_Parallel::computeCG(std::vector<double> &x_0) {
std::vector<double> b = getVectorB();
std::vector<double> mA = getMatrixA();
int max_iter = getMaxIterations();
double tol = getTol();
std::vector<double> x_k = x_0;
std::vector<double> r_k = matrixVector(mA,x_k);
r_k = vectorScalarMul(r_k,-1.0);
r_k = vectorVectorSum(b,r_k);
std::vector<double> d_k = r_k;
double norm_res = vectorVectorDot(r_k,r_k);
int k = 0;
while ((norm_res>tol)&&(k<max_iter)){
std::vector<double> A_dk = matrixVector(mA,d_k);
double dk_A_dk = vectorVectorDot(d_k,A_dk);
double alpha_k = norm_res/dk_A_dk;
x_k = vectorVectorSum(x_k,vectorScalarMul(d_k,alpha_k));
A_dk = vectorScalarMul(A_dk,-alpha_k);
r_k= vectorVectorSum(r_k,A_dk);
double norm_r_k_1 = vectorVectorDot(r_k,r_k);
double beta_k_1 = norm_r_k_1/norm_res;
d_k = vectorVectorSum(r_k,vectorScalarMul(d_k,beta_k_1));
norm_res=norm_r_k_1;
k+=1;
}
return std::make_tuple(x_k,k);
}
/**
* The operation scalar*vector
* @param pVector1 Nx1 std::vector<double>
* @param pScalar double
* @return mul_vector Nx1 std::vector<double> mult element wise of the scalar and the vector
*/
std::vector<double> CG_Parallel::vectorScalarMul(const std::vector<double> &pVector1, const double pScalar) {
int size,rank;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n = pVector1.size();
int batchSize = n / size;
std::vector<double> mul_vector(n,0.0);
std::vector<double> subVec1(batchSize, 0.0);
std::vector<double> subMul(batchSize, 0.0);
double scalar = pScalar;
MPI_Bcast(&scalar, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatter(pVector1.data(), batchSize, MPI_DOUBLE, subVec1.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for(int i=0;i<batchSize;i++){
subMul[i]= subVec1[i]*scalar;
}
//So that everyone gets a copy
MPI_Allgather(subMul.data(), batchSize, MPI_DOUBLE, mul_vector.data(), batchSize, MPI_DOUBLE, MPI_COMM_WORLD);
return mul_vector;
}
/**
* The operation vector+vector
* @param pVector1 Nx1 std::vector<double>
* @param pVector2 Nx1 std::vector<double>
* @return sum_vector Nx1 std::vector<double> sum element wise of the two vectors
*/
std::vector<double> CG_Parallel::vectorVectorSum(const std::vector<double> &pVector1, const std::vector<double> pVector2) {
int size,rank;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n = pVector1.size();
std::vector<double> sum_vector(n,0.0);
int batchSize = n / size;
//Should manage the offset if n is not exactly divisible bz size.
// if (n % size) {
// batchSize++;
//}
std::vector<double> subVec1(batchSize, 0.0);
std::vector<double> subVec2(batchSize, 0.0);
std::vector<double> subSum(batchSize, 0.0);
MPI_Scatter(pVector1.data(), batchSize, MPI_DOUBLE, subVec1.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatter(pVector2.data(), batchSize, MPI_DOUBLE, subVec2.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for (int i = 0;i<batchSize;i++) {
subSum[i] = subVec1[i]+subVec2[i];
}
//Allgather so everyone has a copy.
MPI_Allgather(subSum.data(), batchSize, MPI_DOUBLE, sum_vector.data(), batchSize, MPI_DOUBLE, MPI_COMM_WORLD);
return sum_vector;
}
/**
* The operation vector*vector
* @param pVector1 Nx1 std::vector<double>
* @param pVector2 Nx1 std::vector<double>
* @return double the resulting dot product
*/
double CG_Parallel::vectorVectorDot(const std::vector<double> &pVector1, const std::vector<double> pVector2) {
int size, rank;
MPI_Status status;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int const batchSize = pVector1.size()/size;
std::vector<double> subVec1(batchSize, 0.0);
std::vector<double> subVec2(batchSize, 0.0);
MPI_Scatter(pVector1.data(), batchSize, MPI_DOUBLE, subVec1.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatter(pVector2.data(), batchSize, MPI_DOUBLE, subVec2.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
double partial_dot_product = 0.0;
double total_dot_product = 0.0;
for (int i=0;i<batchSize;i++) {
partial_dot_product += subVec1[i] * subVec2[i];
}
//We use Allreduce so everyone gets a copy.
MPI_Allreduce(&partial_dot_product, &total_dot_product, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
return total_dot_product;
}
/**
* The operation Matrix*Vector. This is the most expensive operation.
* @param pMatrix NxN matrix std::vector<double>
* @param pVector Nx1 vector std::vector<double>
* @return prod_vecotr Nx1 std::vector<double>
*/
std::vector<double> CG_Parallel::matrixVector(const std::vector<double> &pMatrix, const std::vector<double> pVector) {
int size, rank;
MPI_Status status;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n = pVector.size();
int nRows = n/size;
std::vector<double> vector =pVector;
MPI_Bcast(vector.data(), n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
std::vector<double> subMatrix(n*nRows,0.0);
MPI_Scatter(pMatrix.data(), n*nRows, MPI_DOUBLE, subMatrix.data(), n*nRows, MPI_DOUBLE, 0, MPI_COMM_WORLD);
std::vector<double> prod_vector(n,0.0);
std::vector<double> subProd(nRows, 0.0);
for(int i=0;i<nRows;i++){
double sum_term = 0.0;
for(int j=0;j<vector.size();j++){
sum_term+= subMatrix[j+vector.size()*i]*vector[j];
}
subProd[i]=sum_term;
}
MPI_Allgather(subProd.data(), nRows, MPI_DOUBLE, prod_vector.data(), nRows, MPI_DOUBLE, MPI_COMM_WORLD);
return prod_vector;
}
//-------------------Getters-Setters----------------------
const std::vector<double> &CG_Parallel::getMatrixA() const {
return matrixA;
}
void CG_Parallel::setMatrixA(const std::vector<double> &matrixA) {
CG_Parallel::matrixA = matrixA;
}
const std::vector<double> &CG_Parallel::getVectorB() const {
return vectorB;
}
void CG_Parallel::setVectorB(const std::vector<double> &vectorB) {
CG_Parallel::vectorB = vectorB;
}
double CG_Parallel::getTol() const {
return tol;
}
void CG_Parallel::setTol(double tol) {
CG_Parallel::tol = tol;
}
int CG_Parallel::getMaxIterations() const {
return max_iterations;
}
void CG_Parallel::setMaxIterations(int maxIterations) {
max_iterations = maxIterations;
}

Event Timeline