Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F122005955
CG_Parallel.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Jul 15, 07:15
Size
7 KB
Mime Type
text/x-c
Expires
Thu, Jul 17, 07:15 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
27418991
Attached To
R8929 Conjugate Gradient Solver
CG_Parallel.cpp
View Options
//
// Created by shernand on 29/05/19.
//
/**
* @file CG_Serial.cpp
* @author Sergio Hernandez
* This file is part of the Conjugate Gradient Project
*
* This class implements the parallel version of the Conjugate Gradient
*
*/
#include <ctime>
#include <mpi/mpi.h>
#include <cmath>
#include <iostream>
#include "CG_Parallel.hpp"
/**
* Complete constructor of the CG solver.
* @param matrixA std::vector<double> NxN symmetric definite positive
* @param vectorB Nx1 std::vector<double> right side vector
* @param tol double Tolerance for our stopping criterion
* @param maxIter int Maximum number of iterations of the stopping criterion
*/
CG_Parallel::CG_Parallel(std::vector<double> const &matrixA, std::vector<double> const &vectorB, double const &tol,
int const &maxIter) {
setMatrixA(matrixA);
setVectorB(vectorB);
setTol(tol);
setMaxIterations(maxIter);
std::vector<double> timings(3,0.0);
}
/**
* Computes the CG solver
* @param x_0 Nx1 std::vecotr<double> the initial guess for the solution
* @return std::tuple<std::vector<double> x, int n_iterations> The tuple <solution, (Nx1 double) and how many iterations
* it took.
*/
std::tuple<std::vector<double>,int> CG_Parallel::computeCG(std::vector<double> &x_0) {
std::vector<double> b = getVectorB();
std::vector<double> mA = getMatrixA();
int max_iter = getMaxIterations();
double tol = getTol();
std::vector<double> x_k = x_0;
std::vector<double> r_k = matrixVector(mA,x_k);
r_k = vectorScalarMul(r_k,-1.0);
r_k = vectorVectorSum(b,r_k);
std::vector<double> d_k = r_k;
double norm_res = vectorVectorDot(r_k,r_k);
int k = 0;
while ((norm_res>tol)&&(k<max_iter)){
std::vector<double> A_dk = matrixVector(mA,d_k);
double dk_A_dk = vectorVectorDot(d_k,A_dk);
double alpha_k = norm_res/dk_A_dk;
x_k = vectorVectorSum(x_k,vectorScalarMul(d_k,alpha_k));
A_dk = vectorScalarMul(A_dk,-alpha_k);
r_k= vectorVectorSum(r_k,A_dk);
double norm_r_k_1 = vectorVectorDot(r_k,r_k);
double beta_k_1 = norm_r_k_1/norm_res;
d_k = vectorVectorSum(r_k,vectorScalarMul(d_k,beta_k_1));
norm_res=norm_r_k_1;
k+=1;
}
return std::make_tuple(x_k,k);
}
/**
* The operation scalar*vector
* @param pVector1 Nx1 std::vector<double>
* @param pScalar double
* @return mul_vector Nx1 std::vector<double> mult element wise of the scalar and the vector
*/
std::vector<double> CG_Parallel::vectorScalarMul(const std::vector<double> &pVector1, const double pScalar) {
int size,rank;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n = pVector1.size();
int batchSize = n / size;
std::vector<double> mul_vector(n,0.0);
std::vector<double> subVec1(batchSize, 0.0);
std::vector<double> subMul(batchSize, 0.0);
double scalar = pScalar;
MPI_Bcast(&scalar, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatter(pVector1.data(), batchSize, MPI_DOUBLE, subVec1.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for(int i=0;i<batchSize;i++){
subMul[i]= subVec1[i]*scalar;
}
//So that everyone gets a copy
MPI_Allgather(subMul.data(), batchSize, MPI_DOUBLE, mul_vector.data(), batchSize, MPI_DOUBLE, MPI_COMM_WORLD);
return mul_vector;
}
/**
* The operation vector+vector
* @param pVector1 Nx1 std::vector<double>
* @param pVector2 Nx1 std::vector<double>
* @return sum_vector Nx1 std::vector<double> sum element wise of the two vectors
*/
std::vector<double> CG_Parallel::vectorVectorSum(const std::vector<double> &pVector1, const std::vector<double> pVector2) {
int size,rank;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n = pVector1.size();
std::vector<double> sum_vector(n,0.0);
int batchSize = n / size;
//Should manage the offset if n is not exactly divisible bz size.
// if (n % size) {
// batchSize++;
//}
std::vector<double> subVec1(batchSize, 0.0);
std::vector<double> subVec2(batchSize, 0.0);
std::vector<double> subSum(batchSize, 0.0);
MPI_Scatter(pVector1.data(), batchSize, MPI_DOUBLE, subVec1.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatter(pVector2.data(), batchSize, MPI_DOUBLE, subVec2.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for (int i = 0;i<batchSize;i++) {
subSum[i] = subVec1[i]+subVec2[i];
}
//Allgather so everyone has a copy.
MPI_Allgather(subSum.data(), batchSize, MPI_DOUBLE, sum_vector.data(), batchSize, MPI_DOUBLE, MPI_COMM_WORLD);
return sum_vector;
}
/**
* The operation vector*vector
* @param pVector1 Nx1 std::vector<double>
* @param pVector2 Nx1 std::vector<double>
* @return double the resulting dot product
*/
double CG_Parallel::vectorVectorDot(const std::vector<double> &pVector1, const std::vector<double> pVector2) {
int size, rank;
MPI_Status status;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int const batchSize = pVector1.size()/size;
std::vector<double> subVec1(batchSize, 0.0);
std::vector<double> subVec2(batchSize, 0.0);
MPI_Scatter(pVector1.data(), batchSize, MPI_DOUBLE, subVec1.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatter(pVector2.data(), batchSize, MPI_DOUBLE, subVec2.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
double partial_dot_product = 0.0;
double total_dot_product = 0.0;
for (int i=0;i<batchSize;i++) {
partial_dot_product += subVec1[i] * subVec2[i];
}
//We use Allreduce so everyone gets a copy.
MPI_Allreduce(&partial_dot_product, &total_dot_product, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
return total_dot_product;
}
/**
* The operation Matrix*Vector. This is the most expensive operation.
* @param pMatrix NxN matrix std::vector<double>
* @param pVector Nx1 vector std::vector<double>
* @return prod_vecotr Nx1 std::vector<double>
*/
std::vector<double> CG_Parallel::matrixVector(const std::vector<double> &pMatrix, const std::vector<double> pVector) {
int size, rank;
MPI_Status status;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int n = pVector.size();
int nRows = n/size;
std::vector<double> vector =pVector;
MPI_Bcast(vector.data(), n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
std::vector<double> subMatrix(n*nRows,0.0);
MPI_Scatter(pMatrix.data(), n*nRows, MPI_DOUBLE, subMatrix.data(), n*nRows, MPI_DOUBLE, 0, MPI_COMM_WORLD);
std::vector<double> prod_vector(n,0.0);
std::vector<double> subProd(nRows, 0.0);
for(int i=0;i<nRows;i++){
double sum_term = 0.0;
for(int j=0;j<vector.size();j++){
sum_term+= subMatrix[j+vector.size()*i]*vector[j];
}
subProd[i]=sum_term;
}
MPI_Allgather(subProd.data(), nRows, MPI_DOUBLE, prod_vector.data(), nRows, MPI_DOUBLE, MPI_COMM_WORLD);
return prod_vector;
}
//-------------------Getters-Setters----------------------
const std::vector<double> &CG_Parallel::getMatrixA() const {
return matrixA;
}
void CG_Parallel::setMatrixA(const std::vector<double> &matrixA) {
CG_Parallel::matrixA = matrixA;
}
const std::vector<double> &CG_Parallel::getVectorB() const {
return vectorB;
}
void CG_Parallel::setVectorB(const std::vector<double> &vectorB) {
CG_Parallel::vectorB = vectorB;
}
double CG_Parallel::getTol() const {
return tol;
}
void CG_Parallel::setTol(double tol) {
CG_Parallel::tol = tol;
}
int CG_Parallel::getMaxIterations() const {
return max_iterations;
}
void CG_Parallel::setMaxIterations(int maxIterations) {
max_iterations = maxIterations;
}
Event Timeline
Log In to Comment