Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F88557914
ParDenseMatrix.h
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Oct 19, 11:36
Size
4 KB
Mime Type
text/x-c
Expires
Mon, Oct 21, 11:36 (1 d, 22 h)
Engine
blob
Format
Raw Data
Handle
21789784
Attached To
rLAMMPS lammps
ParDenseMatrix.h
View Options
#ifndef PARDENSEMATRIX_H
#define PARDENSEMATRIX_H
#include "MatrixDef.h"
#include "DenseMatrix.h"
#include "DenseVector.h"
#include "MPI_Wrappers.h"
#include "ATC_Error.h"
using ATC::ATC_Error;
#include <algorithm>
#include <sstream>
namespace ATC_matrix {
/**
* @class ParDenseMatrix
* @brief Parallelized version of DenseMatrix class.
*/
template <typename T>
class ParDenseMatrix : public DenseMatrix<T> {
public:
MPI_Comm _comm;
ParDenseMatrix(MPI_Comm comm, INDEX rows=0, INDEX cols=0, bool z=1)
: DenseMatrix<T>(rows, cols, z), _comm(comm) {}
ParDenseMatrix(MPI_Comm comm, const DenseMatrix<T>& c)
: DenseMatrix<T>(c), _comm(comm) {}
ParDenseMatrix(MPI_Comm comm, const SparseMatrix<T>& c)
: DenseMatrix<T>(c), _comm(comm) {}
ParDenseMatrix(MPI_Comm comm, const Matrix<T>& c)
: DenseMatrix<T>(c), _comm(comm) {}
//////////////////////////////////////////////////////////////////////////////
//* performs a matrix-vector multiply
void ParMultMv(const Vector<T> &v,
DenseVector<T> &c, const bool At, T a, T b)
{
// We can't generically support parallel multiplication because the data
// types must be specified when using MPI
MultMv(*this, v, c, At, a, b);
}
};
template<>
class ParDenseMatrix<double> : public DenseMatrix<double> {
public:
MPI_Comm _comm;
ParDenseMatrix(MPI_Comm comm, INDEX rows=0, INDEX cols=0, bool z=1)
: DenseMatrix<double>(rows, cols, z), _comm(comm) {}
ParDenseMatrix(MPI_Comm comm, const DenseMatrix<double>& c)
: DenseMatrix<double>(c), _comm(comm) {}
ParDenseMatrix(MPI_Comm comm, const SparseMatrix<double>& c)
: DenseMatrix<double>(c), _comm(comm) {}
ParDenseMatrix(MPI_Comm comm, const Matrix<double>& c)
: DenseMatrix<double>(c), _comm(comm) {}
void ParMultMv(const Vector<double> &v, DenseVector<double> &c,
const bool At, double a, double b) const
{
// We don't support parallel vec-Mat multiplication yet
if (At) {
MultMv(*this, v, c, At, a, b);
return;
}
const INDEX nRows = this->nRows();
const INDEX nCols = this->nCols();
if (c.size() != nRows) {
c.resize(nRows); // set size of C
c.zero(); // do not add result to C
} else c *= b;
// Determine how many rows will be handled on each processor
int nProcs = MPI_Wrappers::size(_comm);
int myRank = MPI_Wrappers::rank(_comm);
int *majorCounts = new int[nProcs];
int *offsets = new int[nProcs];
#ifdef COL_STORAGE // Column-major storage
int nMajor = nCols;
int nMinor = nRows;
int ParDenseMatrix::*majorField = &ParDenseMatrix::_nCols;
int ParDenseMatrix::*minorField = &ParDenseMatrix::_nRows;
#else // Row-major storage
int nMajor = nRows;
int nMinor = nCols;
int ParDenseMatrix::*majorField = &ParDenseMatrix::_nRows;
int ParDenseMatrix::*minorField = &ParDenseMatrix::_nCols;
#endif
for (int i = 0; i < nProcs; i++) {
// If we have an uneven row-or-col/processors number, or too few rows
// or cols, some processors will need to receive fewer rows/cols.
offsets[i] = (i * nMajor) / nProcs;
majorCounts[i] = (((i + 1) * nMajor) / nProcs) - offsets[i];
}
int myNMajor = majorCounts[myRank];
int myMajorOffset = offsets[myRank];
// Take data from an offset version of A
ParDenseMatrix<double> A_local(_comm);
A_local._data = this->_data + myMajorOffset * nMinor;
A_local.*majorField = myNMajor;
A_local.*minorField = nMinor;
#ifdef COL_STORAGE // Column-major storage
// When splitting by columns, we split the vector as well, and sum the
// results.
DenseVector<double> v_local(myNMajor);
for (int i = 0; i < myNMajor; i++)
v_local(i) = v(myMajorOffset + i);
// Store results in a local vector
DenseVector<double> c_local = A_local * v_local;
// Sum all vectors onto each processor
MPI_Wrappers::allsum(_comm, c_local.ptr(), c.ptr(), c_local.size());
#else // Row-major storage
// When splitting by rows, we use the whole vector and concatenate the
// results.
// Store results in a small local vector
DenseVector<double> c_local(myNMajor);
for (int i = 0; i < myNMajor; i++)
c_local(i) = c(myMajorOffset + i);
MultMv(A_local, v, c_local, At, a, b);
// Gather the results onto each processor
allgatherv(_comm, c_local.ptr(), c_local.size(), c.ptr(),
majorCounts, offsets);
#endif
// Clear out the local matrix's pointer so we don't double-free
A_local._data = NULL;
delete [] majorCounts;
delete [] offsets;
}
};
// Operator for dense Matrix - dense vector product
template<typename T>
DenseVector<T> operator*(const ParDenseMatrix<T> &A, const Vector<T> &b)
{
DenseVector<T> c;
A.ParMultMv(b, c, 0, 1.0, 0.0);
return c;
}
} // end namespace
#endif
Event Timeline
Log In to Comment