Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F87265119
ucl_matrix.h
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Oct 11, 15:31
Size
10 KB
Mime Type
text/x-c++
Expires
Sun, Oct 13, 15:31 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
21566021
Attached To
rLAMMPS lammps
ucl_matrix.h
View Options
/***************************************************************************
ucl_matrix.h
-------------------
W. Michael Brown
Matrix Container on Host
__________________________________________________________________________
This file is part of the Geryon Unified Coprocessor Library (UCL)
__________________________________________________________________________
begin : Thu May 10 2012
copyright : (C) 2012 by W. Michael Brown
email : brownw@ornl.gov
***************************************************************************/
/* -----------------------------------------------------------------------
This software is distributed under the Simplified BSD License.
----------------------------------------------------------------------- */
// Only allow this file to be included by CUDA and OpenCL specific headers
#ifdef _UCL_MAT_ALLOW
/// Matrix S-Object
template
<
class
hosttype
,
class
devtype
>
class
UCL_Matrix
{
public:
// Traits for copying data
// MEM_TYPE is 0 for device, 1 for host, and 2 for image
enum
traits
{
DATA_TYPE
=
_UCL_DATA_ID
<
hosttype
>::
id
,
MEM_TYPE
=
1
,
PADDED
=
0
,
ROW_MAJOR
=
1
,
VECTOR
=
0
};
typedef
hosttype
data_type
;
/// Host Allocation
UCL_H_Mat
<
hosttype
>
host
;
/// Device Allocation
UCL_D_Mat
<
devtype
>
device
;
UCL_Matrix
()
{
}
~
UCL_Matrix
()
{
}
/// Construct with specied number of rows and columns
/** \sa alloc() **/
UCL_Matrix
(
const
size_t
rows
,
const
size_t
cols
,
UCL_Device
&
acc
,
const
enum
UCL_MEMOPT
kind1
=
UCL_READ_WRITE
,
const
enum
UCL_MEMOPT
kind2
=
UCL_READ_WRITE
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
alloc
(
host
,
device
,
_buffer
,
rows
,
cols
,
acc
,
kind1
,
kind2
);
}
/// Set up host matrix with specied # of rows/cols and reserve memory
/** The kind1 parameter controls memory access from the host
* - UCL_READ_WRITE - Specify that you will read and write from host
* - UCL_WRITE_ONLY - Specify that you will only write from host
* - UCL_READ_ONLY - Specify that you will only read from host
* - UCL_NOT_PINNED - Memory is not pinned/page-locked on host
* The kind2 parameter controls memory optimizations from the device:
* - UCL_READ_WRITE - Specify that you will read and write in kernels
* - UCL_WRITE_ONLY - Specify that you will only write in kernels
* - UCL_READ_ONLY - Specify that you will only read in kernels
* \note When passing a command queue instead of a device, the device
* allocation is always performed. Even if the device shares memory
* with the host.
* \param cq Default command queue for operations copied from another mat
* \return UCL_SUCCESS if the memory allocation is successful **/
template
<
class
mat_type
>
inline
int
alloc
(
const
size_t
rows
,
const
size_t
cols
,
mat_type
&
cq
,
const
enum
UCL_MEMOPT
kind1
=
UCL_READ_WRITE
,
const
enum
UCL_MEMOPT
kind2
=
UCL_READ_WRITE
)
{
return
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
alloc
(
host
,
device
,
_buffer
,
rows
,
cols
,
cq
,
kind1
,
kind2
);
}
/// Set up host matrix with specied # of rows/cols and reserve memory
/** The kind1 parameter controls memory access from the host
* - UCL_READ_WRITE - Specify that you will read and write from host
* - UCL_WRITE_ONLY - Specify that you will only write from host
* - UCL_READ_ONLY - Specify that you will only read from host
* - UCL_NOT_PINNED - Memory is not pinned/page-locked on host
* The kind2 parameter controls memory optimizations from the device:
* - UCL_READ_WRITE - Specify that you will read and write in kernels
* - UCL_WRITE_ONLY - Specify that you will only write in kernels
* - UCL_READ_ONLY - Specify that you will only read in kernels
* \param device Used to get the default command queue for operations
* \return UCL_SUCCESS if the memory allocation is successful **/
inline
int
alloc
(
const
size_t
rows
,
const
size_t
cols
,
UCL_Device
&
acc
,
const
enum
UCL_MEMOPT
kind1
=
UCL_READ_WRITE
,
const
enum
UCL_MEMOPT
kind2
=
UCL_READ_WRITE
)
{
return
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
alloc
(
host
,
device
,
_buffer
,
rows
,
cols
,
acc
,
kind1
,
kind2
);
}
/// Free memory and set size to 0
inline
void
clear
()
{
host
.
clear
();
device
.
clear
();
}
/// Resize the allocation to contain cols elements
inline
int
resize
(
const
int
rows
,
const
int
cols
)
{
assert
(
host
.
kind
()
!=
UCL_VIEW
);
int
err
=
host
.
resize
(
rows
,
cols
);
if
(
err
!=
UCL_SUCCESS
)
return
err
;
return
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
dev_resize
(
device
,
host
,
_buffer
,
rows
,
cols
);
}
/// Resize (only if bigger) the allocation to contain cols elements
inline
int
resize_ib
(
const
int
new_rows
,
const
int
new_cols
)
{
if
(
new_rows
>
rows
()
||
new_cols
>
cols
())
return
resize
(
new_rows
,
new_cols
);
else
return
UCL_SUCCESS
;
}
/// Set each element to zero (asynchronously on device)
inline
void
zero
()
{
zero
(
cq
());
}
/// Set first n elements to zero (asynchronously on device)
inline
void
zero
(
const
int
n
)
{
zero
(
n
,
cq
());
}
/// Set each element to zero (asynchronously on device)
inline
void
zero
(
command_queue
&
cq
)
{
host
.
zero
();
if
(
device
.
kind
()
!=
UCL_VIEW
)
device
.
zero
(
cq
);
else
if
(
_buffer
.
numel
()
>
0
)
_buffer
.
zero
();
}
/// Set first n elements to zero (asynchronously on device)
inline
void
zero
(
const
int
n
,
command_queue
&
cq
)
{
host
.
zero
(
n
);
if
(
device
.
kind
()
!=
UCL_VIEW
)
device
.
zero
(
n
,
cq
);
else
if
(
_buffer
.
numel
()
>
0
)
_buffer
.
zero
();
}
/// Get the number of elements
inline
size_t
numel
()
const
{
return
host
.
numel
();
}
/// Get the number of rows
inline
size_t
rows
()
const
{
return
host
.
rows
();
}
/// Get the number of columns
inline
size_t
cols
()
const
{
return
host
.
cols
();
}
/// Get the memory usage (bytes) of the s-object (including any buffers)
inline
size_t
host_mem_usage
()
{
return
host
.
row_bytes
()
*
host
.
rows
()
+
_buffer
.
row_bytes
()
*
_buffer
.
rows
();
}
/// Get the memory usage (bytes) of the s-object (including any buffers)
inline
size_t
device_mem_usage
()
{
return
device
.
row_bytes
()
*
device
.
rows
();
}
/// Get element at index i
inline
hosttype
&
operator
[](
const
int
i
)
{
return
host
[
i
];
}
/// Get element at index i
inline
const
hosttype
&
operator
[](
const
int
i
)
const
{
return
host
[
i
];
}
/// 2D access (row should always be 0)
inline
hosttype
&
operator
()(
const
int
row
,
const
int
col
)
{
return
host
(
row
,
col
);
}
/// 2D access (row should always be 0)
inline
const
hosttype
&
operator
()(
const
int
row
,
const
int
col
)
const
{
return
host
(
row
,
col
);
}
/// Returns pointer to memory pointer for allocation on host
inline
hosttype
**
host_ptr
()
{
return
host
.
host_ptr
();
}
/// Return the default command queue/stream associated with this data
inline
command_queue
&
cq
()
{
return
host
.
cq
();
}
/// Change the default command queue associated with this data
inline
void
cq
(
command_queue
&
cq_in
)
{
host
.
cq
(
cq_in
);
device
.
cq
(
cq_in
);
}
/// Block until command_queue associated with matrix is complete
inline
void
sync
()
{
host
.
sync
();
}
///Get the size of a row on the host (including any padding) in elements
inline
size_t
row_size
()
const
{
return
host
.
row_size
();
}
/// Get the size of a row on the host(including any padding) in bytes
inline
size_t
row_bytes
()
const
{
return
host
.
row_bytes
();
}
/// Get the size on the host in bytes of 1 element
inline
int
element_size
()
const
{
return
sizeof
(
hosttype
);
}
/// Update the allocation on the host asynchronously
inline
void
update_host
()
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
host
,
device
,
_buffer
,
true
);
}
/// Update the allocation on the host (true for asynchronous copy)
inline
void
update_host
(
const
bool
async
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
host
,
device
,
_buffer
,
async
);
}
/// Update the allocation on the host (using command queue)
inline
void
update_host
(
command_queue
&
cq
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
host
,
device
,
_buffer
,
cq
);
}
/// Update the first n elements on the host (true for asynchronous copy)
inline
void
update_host
(
const
int
n
,
const
bool
async
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
host
,
device
,
n
,
_buffer
,
async
);
}
/// Update the first n elements on the host (using command queue)
inline
void
update_host
(
const
int
n
,
command_queue
&
cq
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
host
,
device
,
n
,
_buffer
,
cq
);
}
/// Update slice on the host (true for asynchronous copy)
inline
void
update_host
(
const
int
rows
,
const
int
cols
,
const
bool
async
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
host
,
device
,
rows
,
cols
,
_buffer
,
async
);
}
/// Update slice on the host (using command queue)
inline
void
update_host
(
const
int
rows
,
const
int
cols
,
command_queue
&
cq
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
host
,
device
,
rows
,
cols
,
_buffer
,
cq
);
}
/// Update the allocation on the device asynchronously
inline
void
update_device
()
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
device
,
host
,
_buffer
,
true
);
}
/// Update the allocation on the device (true for asynchronous copy)
inline
void
update_device
(
const
bool
async
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
device
,
host
,
_buffer
,
async
);
}
/// Update the allocation on the device (using command queue)
inline
void
update_device
(
command_queue
&
cq
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
device
,
host
,
_buffer
,
cq
);
}
/// Update the first n elements on the device (true for asynchronous copy)
inline
void
update_device
(
const
int
n
,
const
bool
async
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
device
,
host
,
n
,
_buffer
,
async
);
}
/// Update the first n elements on the device (using command queue)
inline
void
update_device
(
const
int
n
,
command_queue
&
cq
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
device
,
host
,
n
,
_buffer
,
cq
);
}
/// Update slice on the device (true for asynchronous copy)
inline
void
update_device
(
const
int
rows
,
const
int
cols
,
const
bool
async
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
device
,
host
,
rows
,
cols
,
_buffer
,
async
);
}
/// Update slice on the device (using command queue)
inline
void
update_device
(
const
int
rows
,
const
int
cols
,
command_queue
&
cq
)
{
_ucl_s_obj_help
<
ucl_same_type
<
hosttype
,
devtype
>::
ans
>::
copy
(
device
,
host
,
rows
,
cols
,
_buffer
,
cq
);
}
private:
UCL_H_Mat
<
devtype
>
_buffer
;
};
#endif
Event Timeline
Log In to Comment