ucl_matrix.h
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sat, Jul 6, 08:31

ucl_matrix.h
View Options

	/***************************************************************************
	ucl_matrix.h
	-------------------
	W. Michael Brown

	Matrix Container on Host

	__________________________________________________________________________
	This file is part of the Geryon Unified Coprocessor Library (UCL)
	__________________________________________________________________________

	begin : Thu May 10 2012
	copyright : (C) 2012 by W. Michael Brown
	email : brownw@ornl.gov
	***************************************************************************/

	/* -----------------------------------------------------------------------
	This software is distributed under the Simplified BSD License.
	----------------------------------------------------------------------- */

	// Only allow this file to be included by CUDA and OpenCL specific headers
	#ifdef _UCL_MAT_ALLOW

	/// Matrix S-Object
	template <class hosttype, class devtype>
	class UCL_Matrix {
	public:
	// Traits for copying data
	// MEM_TYPE is 0 for device, 1 for host, and 2 for image
	enum traits {
	DATA_TYPE = _UCL_DATA_ID<hosttype>::id,
	MEM_TYPE = 1,
	PADDED = 0,
	ROW_MAJOR = 1,
	VECTOR = 0
	};
	typedef hosttype data_type;

	/// Host Allocation
	UCL_H_Mat<hosttype> host;

	/// Device Allocation
	UCL_D_Mat<devtype> device;

	UCL_Matrix() { }
	~UCL_Matrix() { }

	/// Construct with specied number of rows and columns
	/ \sa alloc() /
	UCL_Matrix(const size_t rows, const size_t cols, UCL_Device &acc,
	const enum UCL_MEMOPT kind1=UCL_READ_WRITE,
	const enum UCL_MEMOPT kind2=UCL_READ_WRITE)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	alloc(host,device,_buffer,rows,cols,acc,kind1,kind2); }

	/// Set up host matrix with specied # of rows/cols and reserve memory
	/** The kind1 parameter controls memory access from the host
	* - UCL_READ_WRITE - Specify that you will read and write from host
	* - UCL_WRITE_ONLY - Specify that you will only write from host
	* - UCL_READ_ONLY - Specify that you will only read from host
	* - UCL_NOT_PINNED - Memory is not pinned/page-locked on host
	* The kind2 parameter controls memory optimizations from the device:
	* - UCL_READ_WRITE - Specify that you will read and write in kernels
	* - UCL_WRITE_ONLY - Specify that you will only write in kernels
	* - UCL_READ_ONLY - Specify that you will only read in kernels
	* \note When passing a command queue instead of a device, the device
	* allocation is always performed. Even if the device shares memory
	* with the host.
	* \param cq Default command queue for operations copied from another mat
	* \return UCL_SUCCESS if the memory allocation is successful **/
	template <class mat_type>
	inline int alloc(const size_t rows, const size_t cols, mat_type &cq,
	const enum UCL_MEMOPT kind1=UCL_READ_WRITE,
	const enum UCL_MEMOPT kind2=UCL_READ_WRITE)
	{ return _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	alloc(host,device,_buffer,rows,cols,cq,kind1,kind2); }

	/// Set up host matrix with specied # of rows/cols and reserve memory
	/** The kind1 parameter controls memory access from the host
	* - UCL_READ_WRITE - Specify that you will read and write from host
	* - UCL_WRITE_ONLY - Specify that you will only write from host
	* - UCL_READ_ONLY - Specify that you will only read from host
	* - UCL_NOT_PINNED - Memory is not pinned/page-locked on host
	* The kind2 parameter controls memory optimizations from the device:
	* - UCL_READ_WRITE - Specify that you will read and write in kernels
	* - UCL_WRITE_ONLY - Specify that you will only write in kernels
	* - UCL_READ_ONLY - Specify that you will only read in kernels
	* \param device Used to get the default command queue for operations
	* \return UCL_SUCCESS if the memory allocation is successful **/
	inline int alloc(const size_t rows, const size_t cols, UCL_Device &acc,
	const enum UCL_MEMOPT kind1=UCL_READ_WRITE,
	const enum UCL_MEMOPT kind2=UCL_READ_WRITE)
	{ return _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	alloc(host,device,_buffer,rows,cols,acc,kind1,kind2); }

	/// Free memory and set size to 0
	inline void clear()
	{ host.clear(); device.clear(); }

	/// Resize the allocation to contain cols elements
	inline int resize(const int rows, const int cols) {
	assert(host.kind()!=UCL_VIEW);
	int err=host.resize(rows,cols);
	if (err!=UCL_SUCCESS)
	return err;
	return _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	dev_resize(device,host,_buffer,rows,cols);
	}

	/// Resize (only if bigger) the allocation to contain cols elements
	inline int resize_ib(const int new_rows, const int new_cols)
	{ if (new_rows>rows() \|\| new_cols>cols()) return resize(new_rows,new_cols);
	else return UCL_SUCCESS; }

	/// Set each element to zero (asynchronously on device)
	inline void zero() { zero(cq()); }
	/// Set first n elements to zero (asynchronously on device)
	inline void zero(const int n) { zero(n,cq()); }
	/// Set each element to zero (asynchronously on device)
	inline void zero(command_queue &cq) {
	host.zero();
	if (device.kind()!=UCL_VIEW) device.zero(cq);
	else if (_buffer.numel()>0) _buffer.zero();
	}
	/// Set first n elements to zero (asynchronously on device)
	inline void zero(const int n, command_queue &cq) {
	host.zero(n);
	if (device.kind()!=UCL_VIEW) device.zero(n,cq);
	else if (_buffer.numel()>0) _buffer.zero();
	}

	/// Get the number of elements
	inline size_t numel() const { return host.numel(); }
	/// Get the number of rows
	inline size_t rows() const { return host.rows(); }
	/// Get the number of columns
	inline size_t cols() const { return host.cols(); }
	/// Get the memory usage (bytes) of the s-object (including any buffers)
	inline size_t host_mem_usage()
	{ return host.row_bytes()host.rows()+_buffer.row_bytes()_buffer.rows(); }
	/// Get the memory usage (bytes) of the s-object (including any buffers)
	inline size_t device_mem_usage()
	{ return device.row_bytes()*device.rows(); }

	/// Get element at index i
	inline hosttype & operator[](const int i) { return host[i]; }
	/// Get element at index i
	inline const hosttype & operator[](const int i) const { return host[i]; }
	/// 2D access (row should always be 0)
	inline hosttype & operator()(const int row, const int col)
	{ return host(row,col); }
	/// 2D access (row should always be 0)
	inline const hosttype & operator()(const int row, const int col) const
	{ return host(row,col); }

	/// Returns pointer to memory pointer for allocation on host
	inline hosttype ** host_ptr() { return host.host_ptr(); }

	/// Return the default command queue/stream associated with this data
	inline command_queue & cq() { return host.cq(); }
	/// Change the default command queue associated with this data
	inline void cq(command_queue &cq_in) { host.cq(cq_in); device.cq(cq_in); }
	/// Block until command_queue associated with matrix is complete
	inline void sync() { host.sync(); }

	///Get the size of a row on the host (including any padding) in elements
	inline size_t row_size() const { return host.row_size(); }
	/// Get the size of a row on the host(including any padding) in bytes
	inline size_t row_bytes() const { return host.row_bytes(); }
	/// Get the size on the host in bytes of 1 element
	inline int element_size() const { return sizeof(hosttype); }


	/// Update the allocation on the host asynchronously
	inline void update_host()
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(host,device,_buffer,true); }
	/// Update the allocation on the host (true for asynchronous copy)
	inline void update_host(const bool async)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(host,device,_buffer,async); }
	/// Update the allocation on the host (using command queue)
	inline void update_host(command_queue &cq)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(host,device,_buffer,cq); }
	/// Update the first n elements on the host (true for asynchronous copy)
	inline void update_host(const int n, const bool async)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(host,device,n,_buffer,async); }
	/// Update the first n elements on the host (using command queue)
	inline void update_host(const int n, command_queue &cq)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(host,device,n,_buffer,cq); }
	/// Update slice on the host (true for asynchronous copy)
	inline void update_host(const int rows, const int cols, const bool async)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(host,device,rows,cols,_buffer,async); }
	/// Update slice on the host (using command queue)
	inline void update_host(const int rows, const int cols, command_queue &cq)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(host,device,rows,cols,_buffer,cq); }


	/// Update the allocation on the device asynchronously
	inline void update_device()
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(device,host,_buffer,true); }
	/// Update the allocation on the device (true for asynchronous copy)
	inline void update_device(const bool async)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(device,host,_buffer,async); }
	/// Update the allocation on the device (using command queue)
	inline void update_device(command_queue &cq)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(device,host,_buffer,cq); }
	/// Update the first n elements on the device (true for asynchronous copy)
	inline void update_device(const int n, const bool async)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(device,host,n,_buffer,async); }
	/// Update the first n elements on the device (using command queue)
	inline void update_device(const int n, command_queue &cq)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(device,host,n,_buffer,cq); }
	/// Update slice on the device (true for asynchronous copy)
	inline void update_device(const int rows, const int cols, const bool async)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(device,host,rows,cols,_buffer,async); }
	/// Update slice on the device (using command queue)
	inline void update_device(const int rows, const int cols, command_queue &cq)
	{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
	copy(device,host,rows,cols,_buffer,cq); }


	private:
	UCL_H_Mat<devtype> _buffer;
	};

	#endif

ucl_matrix.hNo OneTemporaryActions

File Metadata

ucl_matrix.hView Options

Event Timeline

ucl_matrix.h
No OneTemporary
Actions

ucl_matrix.h
View Options