Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F93655152
SparseLinearSystem.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Nov 30, 11:51
Size
12 KB
Mime Type
text/x-c++
Expires
Mon, Dec 2, 11:51 (2 d)
Engine
blob
Format
Raw Data
Handle
22681245
Attached To
rLAMMPS lammps
SparseLinearSystem.hpp
View Options
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef SPARSELINEARSYSTEM_HPP
#define SPARSELINEARSYSTEM_HPP
#include <cmath>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Core.hpp>
#include <Kokkos_StaticCrsGraph.hpp>
#include <LinAlgBLAS.hpp>
namespace
Kokkos
{
template
<
typename
ScalarType
,
class
Device
>
struct
CrsMatrix
{
typedef
Device
execution_space
;
typedef
ScalarType
value_type
;
typedef
StaticCrsGraph
<
int
,
execution_space
,
void
,
int
>
graph_type
;
typedef
View
<
value_type
*
,
execution_space
>
coefficients_type
;
graph_type
graph
;
coefficients_type
coefficients
;
};
//----------------------------------------------------------------------------
namespace
Impl
{
template
<
class
Matrix
,
class
OutputVector
,
class
InputVector
>
struct
Multiply
;
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace
Kokkos
{
namespace
Impl
{
template
<
typename
AScalarType
,
typename
VScalarType
,
class
DeviceType
>
struct
Multiply
<
CrsMatrix
<
AScalarType
,
DeviceType
>
,
View
<
VScalarType
*
,
DeviceType
>
,
View
<
VScalarType
*
,
DeviceType
>
>
{
typedef
DeviceType
execution_space
;
typedef
typename
execution_space
::
size_type
size_type
;
typedef
View
<
VScalarType
*
,
execution_space
,
MemoryUnmanaged
>
vector_type
;
typedef
View
<
const
VScalarType
*
,
execution_space
,
MemoryUnmanaged
>
vector_const_type
;
typedef
CrsMatrix
<
AScalarType
,
execution_space
>
matrix_type
;
private
:
matrix_type
m_A
;
vector_const_type
m_x
;
vector_type
m_y
;
public
:
//--------------------------------------------------------------------------
KOKKOS_INLINE_FUNCTION
void
operator
()(
const
size_type
iRow
)
const
{
const
size_type
iEntryBegin
=
m_A
.
graph
.
row_map
[
iRow
];
const
size_type
iEntryEnd
=
m_A
.
graph
.
row_map
[
iRow
+
1
];
double
sum
=
0
;
#if defined( __INTEL_COMPILER )
#pragma simd reduction(+:sum)
#pragma ivdep
for
(
size_type
iEntry
=
iEntryBegin
;
iEntry
<
iEntryEnd
;
++
iEntry
)
{
sum
+=
m_A
.
coefficients
(
iEntry
)
*
m_x
(
m_A
.
graph
.
entries
(
iEntry
)
);
}
#else
for
(
size_type
iEntry
=
iEntryBegin
;
iEntry
<
iEntryEnd
;
++
iEntry
)
{
sum
+=
m_A
.
coefficients
(
iEntry
)
*
m_x
(
m_A
.
graph
.
entries
(
iEntry
)
);
}
#endif
m_y
(
iRow
)
=
sum
;
}
Multiply
(
const
matrix_type
&
A
,
const
size_type
nrow
,
const
size_type
,
// ncol ,
const
vector_type
&
x
,
const
vector_type
&
y
)
:
m_A
(
A
),
m_x
(
x
),
m_y
(
y
)
{
parallel_for
(
nrow
,
*
this
);
}
};
//----------------------------------------------------------------------------
}
// namespace Impl
}
// namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace
Kokkos
{
//----------------------------------------------------------------------------
template
<
typename
AScalarType
,
typename
VScalarType
,
class
Device
>
class
Operator
{
typedef
CrsMatrix
<
AScalarType
,
Device
>
matrix_type
;
typedef
View
<
VScalarType
*
,
Device
>
vector_type
;
private
:
const
CrsMatrix
<
AScalarType
,
Device
>
A
;
ParallelDataMap
data_map
;
AsyncExchange
<
VScalarType
,
Device
,
ParallelDataMap
>
exchange
;
public
:
Operator
(
const
ParallelDataMap
&
arg_data_map
,
const
CrsMatrix
<
AScalarType
,
Device
>
&
arg_A
)
:
A
(
arg_A
)
,
data_map
(
arg_data_map
)
,
exchange
(
arg_data_map
,
1
)
{}
void
apply
(
const
View
<
VScalarType
*
,
Device
>
&
x
,
const
View
<
VScalarType
*
,
Device
>
&
y
)
{
// Gather off-processor data for 'x'
PackArray
<
vector_type
>::
pack
(
exchange
.
buffer
()
,
data_map
.
count_interior
,
data_map
.
count_send
,
x
);
exchange
.
setup
();
// If interior & boundary matrices then could launch interior multiply
exchange
.
send_receive
();
UnpackArray
<
vector_type
>::
unpack
(
x
,
exchange
.
buffer
()
,
data_map
.
count_owned
,
data_map
.
count_receive
);
const
typename
Device
::
size_type
nrow
=
data_map
.
count_owned
;
const
typename
Device
::
size_type
ncol
=
data_map
.
count_owned
+
data_map
.
count_receive
;
Impl
::
Multiply
<
matrix_type
,
vector_type
,
vector_type
>
(
A
,
nrow
,
ncol
,
x
,
y
);
}
};
//----------------------------------------------------------------------------
template
<
typename
AScalarType
,
typename
VScalarType
,
class
Device
>
void
cgsolve
(
const
ParallelDataMap
data_map
,
const
CrsMatrix
<
AScalarType
,
Device
>
A
,
const
View
<
VScalarType
*
,
Device
>
b
,
const
View
<
VScalarType
*
,
Device
>
x
,
size_t
&
iteration
,
double
&
normr
,
double
&
iter_time
,
const
size_t
maximum_iteration
=
200
,
const
double
tolerance
=
std
::
numeric_limits
<
VScalarType
>::
epsilon
()
)
{
typedef
View
<
VScalarType
*
,
Device
>
vector_type
;
//typedef View<VScalarType, Device> value_type ; // unused
const
size_t
count_owned
=
data_map
.
count_owned
;
const
size_t
count_total
=
data_map
.
count_owned
+
data_map
.
count_receive
;
Operator
<
AScalarType
,
VScalarType
,
Device
>
matrix_operator
(
data_map
,
A
);
// Need input vector to matvec to be owned + received
vector_type
pAll
(
"cg::p"
,
count_total
);
vector_type
p
=
Kokkos
::
subview
(
pAll
,
std
::
pair
<
size_t
,
size_t
>
(
0
,
count_owned
)
);
vector_type
r
(
"cg::r"
,
count_owned
);
vector_type
Ap
(
"cg::Ap"
,
count_owned
);
/* r = b - A * x ; */
/* p = x */
deep_copy
(
p
,
x
);
/* Ap = A * p */
matrix_operator
.
apply
(
pAll
,
Ap
);
/* r = b - Ap */
waxpby
(
count_owned
,
1.0
,
b
,
-
1.0
,
Ap
,
r
);
/* p = r */
deep_copy
(
p
,
r
);
double
old_rdot
=
dot
(
count_owned
,
r
,
data_map
.
machine
);
normr
=
sqrt
(
old_rdot
);
iteration
=
0
;
Kokkos
::
Timer
wall_clock
;
while
(
tolerance
<
normr
&&
iteration
<
maximum_iteration
)
{
/* pAp_dot = dot( p , Ap = A * p ) */
/* Ap = A * p */
matrix_operator
.
apply
(
pAll
,
Ap
);
const
double
pAp_dot
=
dot
(
count_owned
,
p
,
Ap
,
data_map
.
machine
);
const
double
alpha
=
old_rdot
/
pAp_dot
;
/* x += alpha * p ; */
axpy
(
count_owned
,
alpha
,
p
,
x
);
/* r -= alpha * Ap ; */
axpy
(
count_owned
,
-
alpha
,
Ap
,
r
);
const
double
r_dot
=
dot
(
count_owned
,
r
,
data_map
.
machine
);
const
double
beta
=
r_dot
/
old_rdot
;
/* p = r + beta * p ; */
xpby
(
count_owned
,
r
,
beta
,
p
);
normr
=
sqrt
(
old_rdot
=
r_dot
);
++
iteration
;
}
iter_time
=
wall_clock
.
seconds
();
}
//----------------------------------------------------------------------------
}
// namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if ( CUDA_VERSION < 6000 )
#pragma message "cusparse_v2.h"
#include <cusparse_v2.h>
#else
#pragma message "cusparse.h"
#include <cusparse.h>
#endif
namespace
Kokkos
{
namespace
Impl
{
struct
CudaSparseSingleton
{
cusparseHandle_t
handle
;
cusparseMatDescr_t
descra
;
CudaSparseSingleton
()
{
cusparseCreate
(
&
handle
);
cusparseCreateMatDescr
(
&
descra
);
cusparseSetMatType
(
descra
,
CUSPARSE_MATRIX_TYPE_GENERAL
);
cusparseSetMatIndexBase
(
descra
,
CUSPARSE_INDEX_BASE_ZERO
);
}
static
CudaSparseSingleton
&
singleton
();
};
template
<>
struct
Multiply
<
CrsMatrix
<
double
,
Cuda
>
,
View
<
double
*
,
Cuda
>
,
View
<
double
*
,
Cuda
>
>
{
typedef
Cuda
execution_space
;
typedef
execution_space
::
size_type
size_type
;
typedef
double
scalar_type
;
typedef
View
<
scalar_type
*
,
execution_space
>
vector_type
;
typedef
CrsMatrix
<
scalar_type
,
execution_space
>
matrix_type
;
public
:
Multiply
(
const
matrix_type
&
A
,
const
size_type
nrow
,
const
size_type
ncol
,
const
vector_type
&
x
,
const
vector_type
&
y
)
{
CudaSparseSingleton
&
s
=
CudaSparseSingleton
::
singleton
();
const
scalar_type
alpha
=
1
,
beta
=
0
;
cusparseStatus_t
status
=
cusparseDcsrmv
(
s
.
handle
,
CUSPARSE_OPERATION_NON_TRANSPOSE
,
nrow
,
ncol
,
A
.
coefficients
.
dimension_0
()
,
&
alpha
,
s
.
descra
,
A
.
coefficients
.
ptr_on_device
()
,
A
.
graph
.
row_map
.
ptr_on_device
()
,
A
.
graph
.
entries
.
ptr_on_device
()
,
x
.
ptr_on_device
()
,
&
beta
,
y
.
ptr_on_device
()
);
if
(
CUSPARSE_STATUS_SUCCESS
!=
status
)
{
throw
std
::
runtime_error
(
std
::
string
(
"ERROR - cusparseDcsrmv "
)
);
}
}
};
template
<>
struct
Multiply
<
CrsMatrix
<
float
,
Cuda
>
,
View
<
float
*
,
Cuda
>
,
View
<
float
*
,
Cuda
>
>
{
typedef
Cuda
execution_space
;
typedef
execution_space
::
size_type
size_type
;
typedef
float
scalar_type
;
typedef
View
<
scalar_type
*
,
execution_space
>
vector_type
;
typedef
CrsMatrix
<
scalar_type
,
execution_space
>
matrix_type
;
public
:
Multiply
(
const
matrix_type
&
A
,
const
size_type
nrow
,
const
size_type
ncol
,
const
vector_type
&
x
,
const
vector_type
&
y
)
{
CudaSparseSingleton
&
s
=
CudaSparseSingleton
::
singleton
();
const
scalar_type
alpha
=
1
,
beta
=
0
;
cusparseStatus_t
status
=
cusparseScsrmv
(
s
.
handle
,
CUSPARSE_OPERATION_NON_TRANSPOSE
,
nrow
,
ncol
,
A
.
coefficients
.
dimension_0
()
,
&
alpha
,
s
.
descra
,
A
.
coefficients
.
ptr_on_device
()
,
A
.
graph
.
row_map
.
ptr_on_device
()
,
A
.
graph
.
entries
.
ptr_on_device
()
,
x
.
ptr_on_device
()
,
&
beta
,
y
.
ptr_on_device
()
);
if
(
CUSPARSE_STATUS_SUCCESS
!=
status
)
{
throw
std
::
runtime_error
(
std
::
string
(
"ERROR - cusparseDcsrmv "
)
);
}
}
};
}
/* namespace Impl */
}
/* namespace Kokkos */
#endif
/* #if defined( KOKKOS_HAVE_CUDA ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif
/* #ifndef SPARSELINEARSYSTEM_HPP */
Event Timeline
Log In to Comment