Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F93932084
SparseLinearSystem.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Dec 2, 14:52
Size
12 KB
Mime Type
text/x-c++
Expires
Wed, Dec 4, 14:52 (2 d)
Engine
blob
Format
Raw Data
Handle
22724511
Attached To
rLAMMPS lammps
SparseLinearSystem.hpp
View Options
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef SPARSELINEARSYSTEM_HPP
#define SPARSELINEARSYSTEM_HPP
#include <cmath>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Core.hpp>
#include <Kokkos_StaticCrsGraph.hpp>
#include <LinAlgBLAS.hpp>
namespace Kokkos {
template< typename ScalarType , class Device >
struct CrsMatrix {
typedef Device execution_space ;
typedef ScalarType value_type ;
typedef StaticCrsGraph< int , execution_space , void , int > graph_type ;
typedef View< value_type* , execution_space > coefficients_type ;
graph_type graph ;
coefficients_type coefficients ;
};
//----------------------------------------------------------------------------
namespace Impl {
template< class Matrix , class OutputVector , class InputVector >
struct Multiply ;
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< typename AScalarType ,
typename VScalarType ,
class DeviceType >
struct Multiply< CrsMatrix<AScalarType,DeviceType> ,
View<VScalarType*,DeviceType > ,
View<VScalarType*,DeviceType > >
{
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
typedef View< VScalarType*, execution_space, MemoryUnmanaged > vector_type ;
typedef View< const VScalarType*, execution_space, MemoryUnmanaged > vector_const_type ;
typedef CrsMatrix< AScalarType , execution_space > matrix_type ;
private:
matrix_type m_A ;
vector_const_type m_x ;
vector_type m_y ;
public:
//--------------------------------------------------------------------------
KOKKOS_INLINE_FUNCTION
void operator()( const size_type iRow ) const
{
const size_type iEntryBegin = m_A.graph.row_map[iRow];
const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
double sum = 0 ;
#if defined( __INTEL_COMPILER )
#pragma simd reduction(+:sum)
#pragma ivdep
for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
sum += m_A.coefficients(iEntry) * m_x( m_A.graph.entries(iEntry) );
}
#else
for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
sum += m_A.coefficients(iEntry) * m_x( m_A.graph.entries(iEntry) );
}
#endif
m_y(iRow) = sum ;
}
Multiply( const matrix_type & A ,
const size_type nrow ,
const size_type , // ncol ,
const vector_type & x ,
const vector_type & y )
: m_A( A ), m_x( x ), m_y( y )
{
parallel_for( nrow , *this );
}
};
//----------------------------------------------------------------------------
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
//----------------------------------------------------------------------------
template< typename AScalarType ,
typename VScalarType ,
class Device >
class Operator {
typedef CrsMatrix<AScalarType,Device> matrix_type ;
typedef View<VScalarType*,Device> vector_type ;
private:
const CrsMatrix<AScalarType,Device> A ;
ParallelDataMap data_map ;
AsyncExchange< VScalarType , Device , ParallelDataMap > exchange ;
public:
Operator( const ParallelDataMap & arg_data_map ,
const CrsMatrix<AScalarType,Device> & arg_A )
: A( arg_A )
, data_map( arg_data_map )
, exchange( arg_data_map , 1 )
{}
void apply( const View<VScalarType*,Device> & x ,
const View<VScalarType*,Device> & y )
{
// Gather off-processor data for 'x'
PackArray< vector_type >::pack( exchange.buffer() ,
data_map.count_interior ,
data_map.count_send , x );
exchange.setup();
// If interior & boundary matrices then could launch interior multiply
exchange.send_receive();
UnpackArray< vector_type >::unpack( x , exchange.buffer() ,
data_map.count_owned ,
data_map.count_receive );
const typename Device::size_type nrow = data_map.count_owned ;
const typename Device::size_type ncol = data_map.count_owned +
data_map.count_receive ;
Impl::Multiply<matrix_type,vector_type,vector_type>( A, nrow, ncol, x, y);
}
};
//----------------------------------------------------------------------------
template< typename AScalarType , typename VScalarType , class Device >
void cgsolve(
const ParallelDataMap data_map ,
const CrsMatrix<AScalarType,Device> A ,
const View<VScalarType*,Device> b ,
const View<VScalarType*,Device> x ,
size_t & iteration ,
double & normr ,
double & iter_time ,
const size_t maximum_iteration = 200 ,
const double tolerance = std::numeric_limits<VScalarType>::epsilon() )
{
typedef View<VScalarType*,Device> vector_type ;
//typedef View<VScalarType, Device> value_type ; // unused
const size_t count_owned = data_map.count_owned ;
const size_t count_total = data_map.count_owned + data_map.count_receive ;
Operator<AScalarType,VScalarType,Device> matrix_operator( data_map , A );
// Need input vector to matvec to be owned + received
vector_type pAll ( "cg::p" , count_total );
vector_type p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) );
vector_type r ( "cg::r" , count_owned );
vector_type Ap( "cg::Ap", count_owned );
/* r = b - A * x ; */
/* p = x */ deep_copy( p , x );
/* Ap = A * p */ matrix_operator.apply( pAll , Ap );
/* r = b - Ap */ waxpby( count_owned , 1.0 , b , -1.0 , Ap , r );
/* p = r */ deep_copy( p , r );
double old_rdot = dot( count_owned , r , data_map.machine );
normr = sqrt( old_rdot );
iteration = 0 ;
Kokkos::Timer wall_clock ;
while ( tolerance < normr && iteration < maximum_iteration ) {
/* pAp_dot = dot( p , Ap = A * p ) */
/* Ap = A * p */ matrix_operator.apply( pAll , Ap );
const double pAp_dot = dot( count_owned , p , Ap , data_map.machine );
const double alpha = old_rdot / pAp_dot ;
/* x += alpha * p ; */ axpy( count_owned, alpha, p , x );
/* r -= alpha * Ap ; */ axpy( count_owned, -alpha, Ap, r );
const double r_dot = dot( count_owned , r , data_map.machine );
const double beta = r_dot / old_rdot ;
/* p = r + beta * p ; */ xpby( count_owned , r , beta , p );
normr = sqrt( old_rdot = r_dot );
++iteration ;
}
iter_time = wall_clock.seconds();
}
//----------------------------------------------------------------------------
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if ( CUDA_VERSION < 6000 )
#pragma message "cusparse_v2.h"
#include <cusparse_v2.h>
#else
#pragma message "cusparse.h"
#include <cusparse.h>
#endif
namespace Kokkos {
namespace Impl {
struct CudaSparseSingleton {
cusparseHandle_t handle;
cusparseMatDescr_t descra;
CudaSparseSingleton()
{
cusparseCreate( & handle );
cusparseCreateMatDescr( & descra );
cusparseSetMatType( descra , CUSPARSE_MATRIX_TYPE_GENERAL );
cusparseSetMatIndexBase( descra , CUSPARSE_INDEX_BASE_ZERO );
}
static CudaSparseSingleton & singleton();
};
template<>
struct Multiply< CrsMatrix<double,Cuda> ,
View<double*,Cuda > ,
View<double*,Cuda > >
{
typedef Cuda execution_space ;
typedef execution_space::size_type size_type ;
typedef double scalar_type ;
typedef View< scalar_type* , execution_space > vector_type ;
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
public:
Multiply( const matrix_type & A ,
const size_type nrow ,
const size_type ncol ,
const vector_type & x ,
const vector_type & y )
{
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
const scalar_type alpha = 1 , beta = 0 ;
cusparseStatus_t status =
cusparseDcsrmv( s.handle ,
CUSPARSE_OPERATION_NON_TRANSPOSE ,
nrow , ncol , A.coefficients.dimension_0() ,
&alpha ,
s.descra ,
A.coefficients.ptr_on_device() ,
A.graph.row_map.ptr_on_device() ,
A.graph.entries.ptr_on_device() ,
x.ptr_on_device() ,
&beta ,
y.ptr_on_device() );
if ( CUSPARSE_STATUS_SUCCESS != status ) {
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
}
}
};
template<>
struct Multiply< CrsMatrix<float,Cuda> ,
View<float*,Cuda > ,
View<float*,Cuda > >
{
typedef Cuda execution_space ;
typedef execution_space::size_type size_type ;
typedef float scalar_type ;
typedef View< scalar_type* , execution_space > vector_type ;
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
public:
Multiply( const matrix_type & A ,
const size_type nrow ,
const size_type ncol ,
const vector_type & x ,
const vector_type & y )
{
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
const scalar_type alpha = 1 , beta = 0 ;
cusparseStatus_t status =
cusparseScsrmv( s.handle ,
CUSPARSE_OPERATION_NON_TRANSPOSE ,
nrow , ncol , A.coefficients.dimension_0() ,
&alpha ,
s.descra ,
A.coefficients.ptr_on_device() ,
A.graph.row_map.ptr_on_device() ,
A.graph.entries.ptr_on_device() ,
x.ptr_on_device() ,
&beta ,
y.ptr_on_device() );
if ( CUSPARSE_STATUS_SUCCESS != status ) {
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
}
}
};
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef SPARSELINEARSYSTEM_HPP */
Event Timeline
Log In to Comment