CG_Parallel.cpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Mon, Jul 21, 07:50

CG_Parallel.cpp
View Options

	//
	// Created by shernand on 29/05/19.
	//
	/**
	* @file CG_Serial.cpp
	* @author Sergio Hernandez
	* This file is part of the Conjugate Gradient Project
	*
	* This class implements the parallel version of the Conjugate Gradient
	*
	*/
	#include <ctime>
	#include <mpi/mpi.h>
	#include <cmath>
	#include <iostream>
	#include "CG_Parallel.hpp"
	/**
	* Complete constructor of the CG solver.
	* @param matrixA std::vector<double> NxN symmetric definite positive
	* @param vectorB Nx1 std::vector<double> right side vector
	* @param tol double Tolerance for our stopping criterion
	* @param maxIter int Maximum number of iterations of the stopping criterion
	*/
	CG_Parallel::CG_Parallel(std::vector<double> const &matrixA, std::vector<double> const &vectorB, double const &tol,
	int const &maxIter) {
	setMatrixA(matrixA);
	setVectorB(vectorB);
	setTol(tol);
	setMaxIterations(maxIter);
	std::vector<double> timings(3,0.0);
	}

	/**
	* Computes the CG solver
	* @param x_0 Nx1 std::vecotr<double> the initial guess for the solution
	* @return std::tuple<std::vector<double> x, int n_iterations> The tuple <solution, (Nx1 double) and how many iterations
	* it took.
	*/
	std::tuple<std::vector<double>,int> CG_Parallel::computeCG(std::vector<double> &x_0) {
	std::vector<double> b = getVectorB();
	std::vector<double> mA = getMatrixA();
	int max_iter = getMaxIterations();
	double tol = getTol();
	std::vector<double> x_k = x_0;
	std::vector<double> r_k = matrixVector(mA,x_k);
	r_k = vectorScalarMul(r_k,-1.0);
	r_k = vectorVectorSum(b,r_k);
	std::vector<double> d_k = r_k;
	double norm_res = vectorVectorDot(r_k,r_k);
	int k = 0;
	while ((norm_res>tol)&&(k<max_iter)){
	std::vector<double> A_dk = matrixVector(mA,d_k);
	double dk_A_dk = vectorVectorDot(d_k,A_dk);
	double alpha_k = norm_res/dk_A_dk;
	x_k = vectorVectorSum(x_k,vectorScalarMul(d_k,alpha_k));
	A_dk = vectorScalarMul(A_dk,-alpha_k);
	r_k= vectorVectorSum(r_k,A_dk);
	double norm_r_k_1 = vectorVectorDot(r_k,r_k);
	double beta_k_1 = norm_r_k_1/norm_res;
	d_k = vectorVectorSum(r_k,vectorScalarMul(d_k,beta_k_1));
	norm_res=norm_r_k_1;
	k+=1;
	}
	return std::make_tuple(x_k,k);
	}

	/**
	* The operation scalar*vector
	* @param pVector1 Nx1 std::vector<double>
	* @param pScalar double
	* @return mul_vector Nx1 std::vector<double> mult element wise of the scalar and the vector
	*/
	std::vector<double> CG_Parallel::vectorScalarMul(const std::vector<double> &pVector1, const double pScalar) {
	int size,rank;
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	int n = pVector1.size();
	int batchSize = n / size;
	std::vector<double> mul_vector(n,0.0);
	std::vector<double> subVec1(batchSize, 0.0);
	std::vector<double> subMul(batchSize, 0.0);
	double scalar = pScalar;
	MPI_Bcast(&scalar, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Scatter(pVector1.data(), batchSize, MPI_DOUBLE, subVec1.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	for(int i=0;i<batchSize;i++){
	subMul[i]= subVec1[i]*scalar;
	}
	//So that everyone gets a copy
	MPI_Allgather(subMul.data(), batchSize, MPI_DOUBLE, mul_vector.data(), batchSize, MPI_DOUBLE, MPI_COMM_WORLD);
	return mul_vector;
	}

	/**
	* The operation vector+vector
	* @param pVector1 Nx1 std::vector<double>
	* @param pVector2 Nx1 std::vector<double>
	* @return sum_vector Nx1 std::vector<double> sum element wise of the two vectors
	*/
	std::vector<double> CG_Parallel::vectorVectorSum(const std::vector<double> &pVector1, const std::vector<double> pVector2) {
	int size,rank;
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	int n = pVector1.size();
	std::vector<double> sum_vector(n,0.0);
	int batchSize = n / size;
	//Should manage the offset if n is not exactly divisible bz size.
	// if (n % size) {
	// batchSize++;
	//}
	std::vector<double> subVec1(batchSize, 0.0);
	std::vector<double> subVec2(batchSize, 0.0);
	std::vector<double> subSum(batchSize, 0.0);
	MPI_Scatter(pVector1.data(), batchSize, MPI_DOUBLE, subVec1.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Scatter(pVector2.data(), batchSize, MPI_DOUBLE, subVec2.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	for (int i = 0;i<batchSize;i++) {
	subSum[i] = subVec1[i]+subVec2[i];
	}
	//Allgather so everyone has a copy.
	MPI_Allgather(subSum.data(), batchSize, MPI_DOUBLE, sum_vector.data(), batchSize, MPI_DOUBLE, MPI_COMM_WORLD);
	return sum_vector;
	}

	/**
	* The operation vector*vector
	* @param pVector1 Nx1 std::vector<double>
	* @param pVector2 Nx1 std::vector<double>
	* @return double the resulting dot product
	*/
	double CG_Parallel::vectorVectorDot(const std::vector<double> &pVector1, const std::vector<double> pVector2) {
	int size, rank;
	MPI_Status status;
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	int const batchSize = pVector1.size()/size;
	std::vector<double> subVec1(batchSize, 0.0);
	std::vector<double> subVec2(batchSize, 0.0);
	MPI_Scatter(pVector1.data(), batchSize, MPI_DOUBLE, subVec1.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Scatter(pVector2.data(), batchSize, MPI_DOUBLE, subVec2.data(), batchSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	double partial_dot_product = 0.0;
	double total_dot_product = 0.0;
	for (int i=0;i<batchSize;i++) {
	partial_dot_product += subVec1[i] * subVec2[i];
	}
	//We use Allreduce so everyone gets a copy.
	MPI_Allreduce(&partial_dot_product, &total_dot_product, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	return total_dot_product;
	}
	/**
	* The operation Matrix*Vector. This is the most expensive operation.
	* @param pMatrix NxN matrix std::vector<double>
	* @param pVector Nx1 vector std::vector<double>
	* @return prod_vecotr Nx1 std::vector<double>
	*/
	std::vector<double> CG_Parallel::matrixVector(const std::vector<double> &pMatrix, const std::vector<double> pVector) {
	int size, rank;
	MPI_Status status;
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	int n = pVector.size();
	int nRows = n/size;
	std::vector<double> vector =pVector;
	MPI_Bcast(vector.data(), n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	std::vector<double> subMatrix(n*nRows,0.0);
	MPI_Scatter(pMatrix.data(), nnRows, MPI_DOUBLE, subMatrix.data(), nnRows, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	std::vector<double> prod_vector(n,0.0);
	std::vector<double> subProd(nRows, 0.0);
	for(int i=0;i<nRows;i++){
	double sum_term = 0.0;
	for(int j=0;j<vector.size();j++){
	sum_term+= subMatrix[j+vector.size()i]vector[j];
	}
	subProd[i]=sum_term;
	}
	MPI_Allgather(subProd.data(), nRows, MPI_DOUBLE, prod_vector.data(), nRows, MPI_DOUBLE, MPI_COMM_WORLD);
	return prod_vector;
	}

	//-------------------Getters-Setters----------------------
	const std::vector<double> &CG_Parallel::getMatrixA() const {
	return matrixA;
	}

	void CG_Parallel::setMatrixA(const std::vector<double> &matrixA) {
	CG_Parallel::matrixA = matrixA;
	}

	const std::vector<double> &CG_Parallel::getVectorB() const {
	return vectorB;
	}

	void CG_Parallel::setVectorB(const std::vector<double> &vectorB) {
	CG_Parallel::vectorB = vectorB;
	}

	double CG_Parallel::getTol() const {
	return tol;
	}

	void CG_Parallel::setTol(double tol) {
	CG_Parallel::tol = tol;
	}

	int CG_Parallel::getMaxIterations() const {
	return max_iterations;
	}

	void CG_Parallel::setMaxIterations(int maxIterations) {
	max_iterations = maxIterations;
	}

CG_Parallel.cppNo OneTemporaryActions

File Metadata

CG_Parallel.cppView Options

Event Timeline

CG_Parallel.cpp
No OneTemporary
Actions

CG_Parallel.cpp
View Options