Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F80691724
cudpp_plan.h
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Sep 2, 03:00
Size
7 KB
Mime Type
text/x-c++
Expires
Wed, Sep 4, 03:00 (1 d, 21 h)
Engine
blob
Format
Raw Data
Handle
20009263
Attached To
rLAMMPS lammps
cudpp_plan.h
View Options
// -------------------------------------------------------------
// CUDPP -- CUDA Data Parallel Primitives library
// -------------------------------------------------------------
// $Revision: 3572$
// $Date$
// -------------------------------------------------------------
// This source code is distributed under the terms of license.txt
// in the root directory of this source distribution.
// -------------------------------------------------------------
#ifndef __CUDPP_PLAN_H__
#define __CUDPP_PLAN_H__
typedef
void
*
KernelPointer
;
extern
"C"
size_t
getNumCTAs
(
KernelPointer
kernel
);
extern
"C"
void
compNumCTAs
(
KernelPointer
kernel
,
size_t
bytesDynamicSharedMem
,
size_t
threadsPerBlock
);
template
<
typename
T
>
size_t
numCTAs
(
T
kernel
)
{
return
getNumCTAs
((
KernelPointer
)
kernel
);
}
template
<
typename
T
>
void
computeNumCTAs
(
T
kernel
,
unsigned
int
bytesDynamicSharedMem
,
size_t
threadsPerBlock
)
{
compNumCTAs
((
KernelPointer
)
kernel
,
bytesDynamicSharedMem
,
threadsPerBlock
);
}
/** @brief Base class for CUDPP Plan data structures
*
* CUDPPPlan and its subclasses provide the internal (i.e. not visible to the
* library user) infrastructure for planning algorithm execution. They
* own intermediate storage for CUDPP algorithms as well as, in some cases,
* information about optimal execution configuration for the present hardware.
*
*/
class
CUDPPPlan
{
public:
CUDPPPlan
(
CUDPPConfiguration
config
,
size_t
numElements
,
size_t
numRows
,
size_t
rowPitch
);
virtual
~
CUDPPPlan
()
{}
// Note anything passed to functions compiled by NVCC must be public
CUDPPConfiguration
m_config
;
//!< @internal Options structure
size_t
m_numElements
;
//!< @internal Maximum number of input elements
size_t
m_numRows
;
//!< @internal Maximum number of input rows
size_t
m_rowPitch
;
//!< @internal Pitch of input rows in elements
};
/** @brief Plan class for scan algorithm
*
*/
class
CUDPPScanPlan
:
public
CUDPPPlan
{
public:
CUDPPScanPlan
(
CUDPPConfiguration
config
,
size_t
numElements
,
size_t
numRows
,
size_t
rowPitch
);
virtual
~
CUDPPScanPlan
();
void
**
m_blockSums
;
//!< @internal Intermediate block sums array
size_t
*
m_rowPitches
;
//!< @internal Pitch of each row in elements (for cudppMultiScan())
size_t
m_numEltsAllocated
;
//!< @internal Number of elements allocated (maximum scan size)
size_t
m_numRowsAllocated
;
//!< @internal Number of rows allocated (for cudppMultiScan())
size_t
m_numLevelsAllocated
;
//!< @internal Number of levels allocaed (in _scanBlockSums)
};
/** @brief Plan class for segmented scan algorithm
*
*/
class
CUDPPSegmentedScanPlan
:
public
CUDPPPlan
{
public:
CUDPPSegmentedScanPlan
(
CUDPPConfiguration
config
,
size_t
numElements
);
virtual
~
CUDPPSegmentedScanPlan
();
void
**
m_blockSums
;
//!< @internal Intermediate block sums array
unsigned
int
**
m_blockFlags
;
//!< @internal Intermediate block flags array
unsigned
int
**
m_blockIndices
;
//!< @internal Intermediate block indices array
size_t
m_numEltsAllocated
;
//!< @internal Number of elements allocated (maximum scan size)
size_t
m_numLevelsAllocated
;
//!< @internal Number of levels allocaed (in _scanBlockSums)
};
/** @brief Plan class for compact algorithm
*
*/
class
CUDPPCompactPlan
:
public
CUDPPPlan
{
public:
CUDPPCompactPlan
(
CUDPPConfiguration
config
,
size_t
numElements
,
size_t
numRows
,
size_t
rowPitch
);
virtual
~
CUDPPCompactPlan
();
CUDPPScanPlan
*
m_scanPlan
;
//!< @internal Compact performs a scan of type unsigned int using this plan
unsigned
int
*
m_d_outputIndices
;
//!< @internal Output address of compacted elements; this is the result of scan
};
class
CUDPPRadixSortPlan
:
public
CUDPPPlan
{
public:
CUDPPRadixSortPlan
(
CUDPPConfiguration
config
,
size_t
numElements
);
virtual
~
CUDPPRadixSortPlan
();
bool
m_bKeysOnly
;
bool
m_bManualCoalesce
;
bool
m_bUsePersistentCTAs
;
unsigned
int
m_persistentCTAThreshold
[
2
];
unsigned
int
m_persistentCTAThresholdFullBlocks
[
2
];
CUDPPScanPlan
*
m_scanPlan
;
//!< @internal Sort performs a scan of type unsigned int using this plan
unsigned
int
m_keyBits
;
mutable
void
*
m_tempKeys
;
//!< @internal Intermediate storage for keys
mutable
void
*
m_tempValues
;
//!< @internal Intermediate storage for values
unsigned
int
*
m_counters
;
//!< @internal Counter for each radix
unsigned
int
*
m_countersSum
;
//!< @internal Prefix sum of radix counters
unsigned
int
*
m_blockOffsets
;
//!< @internal Global offsets of each radix in each block
};
/** @brief Plan class for sparse-matrix dense-vector multiply
*
*/
class
CUDPPSparseMatrixVectorMultiplyPlan
:
public
CUDPPPlan
{
public:
CUDPPSparseMatrixVectorMultiplyPlan
(
CUDPPConfiguration
config
,
size_t
numNZElts
,
const
void
*
A
,
const
unsigned
int
*
rowindx
,
const
unsigned
int
*
indx
,
size_t
numRows
);
virtual
~
CUDPPSparseMatrixVectorMultiplyPlan
();
CUDPPSegmentedScanPlan
*
m_segmentedScanPlan
;
//!< @internal Performs a segmented scan of type T using this plan
void
*
m_d_prod
;
//!< @internal Vector of products (of an element in A and its corresponding (thats is
//! belongs to the same row) element in x; this is the input and output of
//! segmented scan
unsigned
int
*
m_d_flags
;
//!< @internal Vector of flags where a flag is set if an element of A is the first element
//! of its row; this is the flags vector for segmented scan
unsigned
int
*
m_d_rowFinalIndex
;
//!< @internal Vector of row end indices, which for each row specifies an index in A
//! which is the last element of that row. Resides in GPU memory.
unsigned
int
*
m_d_rowIndex
;
//!< @internal Vector of row end indices, which for each row specifies an index in A
//! which is the first element of that row. Resides in GPU memory.
unsigned
int
*
m_d_index
;
//!<@internal Vector of column numbers one for each element in A
void
*
m_d_A
;
//!<@internal The A matrix
unsigned
int
*
m_rowFinalIndex
;
//!< @internal Vector of row end indices, which for each row specifies an index in A
//! which is the last element of that row. Resides in CPU memory.
size_t
m_numRows
;
//!< Number of rows
size_t
m_numNonZeroElements
;
//!<Number of non-zero elements
};
/** @brief Plan class for random number generator
*
*/
class
CUDPPRandPlan
:
public
CUDPPPlan
{
public:
CUDPPRandPlan
(
CUDPPConfiguration
config
,
size_t
num_elements
);
unsigned
int
m_seed
;
//!< @internal the seed for the random number generator
};
#endif
// __CUDPP_PLAN_H__
Event Timeline
Log In to Comment