cuda_data.cu
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sat, Oct 19, 00:46

cuda_data.cu
View Options

	enum copy_mode {x, xx, xy, yx, xyz, xzy}; // yxz, yzx, zxy, zyx not yet implemented since they were not needed yet

	#include "cuda_data_cu.h"
	#include "cuda_wrapper_cu.h"
	#include "cuda_data_kernel.cu"
	#include <cstdio>

	void CudaData_Upload_DoubleFloat(void* host_data,void* dev_data, unsigned* n,copy_mode mode,void* buffer)
	{
	int size=n[0];
	if(n[1]>0) size*=n[1];
	if(n[2]>0) size*=n[2];

	dim3 threads; threads.x=1; threads.y=1; threads.z=1;
	dim3 grid; grid.x=1; grid.y=1; grid.z=1;

	if(size<=128*30)
	threads.x=32;
	else if(size<=256*30)
	threads.x=64;
	else if(size<=512*30)
	threads.x=128;
	else
	threads.x=256;

	grid.x=((size-1)+threads.x)/threads.x;
	if(grid.x>32000)
	grid.x=32000;
	while(grid.xgrid.ythreads.x<size) grid.y++;
	float debugdata[size];
	//int* cu_debug=(int) CudaWrapper_AllocCudaData(sizesizeof(FLOAT));
	size*=sizeof(double);
	printf("size: %i (%i %i %i) (%i %i %i) %p\n",size,grid.x,grid.y,threads.x,n[0],n[1],n[2],buffer);
	CudaWrapper_UploadCudaData(host_data, buffer, size);
	CudaData_Upload_Kernel_DoubleFloat<<<grid,threads>>>((double)buffer,(float)dev_data,n[0],n[1],n[2],mode);
	cudaThreadSynchronize();
	CudaWrapper_DownloadCudaData(debugdata, dev_data, size/2);
	double sum=0;
	printf("debugdata: ");
	for(int i=0;i<size/sizeof(double);i++) sum+=(debugdata[i]-((double) host_data)[i])(debugdata[i]-((double*) host_data)[i]);

	printf("%lf \n",sum);

	}

	void CudaData_Upload_DoubleDouble(void* host_data,void* dev_data, unsigned* n,copy_mode mode,void* buffer)
	{
	int size=n[0];
	if(n[1]>0) size*=n[1];
	if(n[2]>0) size*=n[2];

	dim3 threads; threads.x=1; threads.y=1; threads.z=1;
	dim3 grid; grid.x=1; grid.y=1; grid.z=1;

	if(size<=128*30)
	threads.x=32;
	else if(size<=256*30)
	threads.x=64;
	else if(size<=512*30)
	threads.x=128;
	else
	threads.x=256;

	grid.x=((size-1)+threads.x)/threads.x;
	if(grid.x>32000)
	grid.x=32000;
	while(grid.xgrid.ythreads.x<size) grid.y++;

	size*=sizeof(double);

	CudaWrapper_UploadCudaData(host_data, buffer, size);
	CudaData_Upload_Kernel_DoubleDouble<<<grid,threads>>>((double)buffer,(double)dev_data,n[0],n[1],n[2],mode);
	cudaThreadSynchronize();
	}

	void CudaData_Upload_FloatDouble(void* host_data,void* dev_data, unsigned* n,copy_mode mode,void* buffer)
	{
	int size=n[0];
	if(n[1]>0) size*=n[1];
	if(n[2]>0) size*=n[2];

	dim3 threads; threads.x=1; threads.y=1; threads.z=1;
	dim3 grid; grid.x=1; grid.y=1; grid.z=1;

	if(size<=128*30)
	threads.x=32;
	else if(size<=256*30)
	threads.x=64;
	else if(size<=512*30)
	threads.x=128;
	else
	threads.x=256;

	grid.x=((size-1)+threads.x)/threads.x;
	if(grid.x>32000)
	grid.x=32000;
	while(grid.xgrid.ythreads.x<size) grid.y++;

	size*=sizeof(float);

	CudaWrapper_UploadCudaData(host_data, buffer, size);
	CudaData_Upload_Kernel_FloatDouble<<<grid,threads>>>((float)buffer,(double)dev_data,n[0],n[1],n[2],mode);
	cudaThreadSynchronize();
	}

	void CudaData_Upload_FloatFloat(void* host_data,void* dev_data, unsigned* n,copy_mode mode,void* buffer)
	{
	int size=n[0];
	if(n[1]>0) size*=n[1];
	if(n[2]>0) size*=n[2];

	dim3 threads; threads.x=1; threads.y=1; threads.z=1;
	dim3 grid; grid.x=1; grid.y=1; grid.z=1;

	if(size<=128*30)
	threads.x=32;
	else if(size<=256*30)
	threads.x=64;
	else if(size<=512*30)
	threads.x=128;
	else
	threads.x=256;

	grid.x=((size-1)+threads.x)/threads.x;
	if(grid.x>32000)
	grid.x=32000;
	while(grid.xgrid.ythreads.x<size) grid.y++;

	size*=sizeof(float);

	CudaWrapper_UploadCudaData(host_data, buffer, size);
	CudaData_Upload_Kernel_FloatFloat<<<grid,threads>>>((float)buffer,(float)dev_data,n[0],n[1],n[2],mode);
	cudaThreadSynchronize();
	}

	void CudaData_Upload_IntInt(void* host_data,void* dev_data, unsigned* n,copy_mode mode,void* buffer)
	{
	int size=n[0];
	if(n[1]>0) size*=n[1];
	if(n[2]>0) size*=n[2];

	dim3 threads; threads.x=1; threads.y=1; threads.z=1;
	dim3 grid; grid.x=1; grid.y=1; grid.z=1;

	if(size<=128*30)
	threads.x=32;
	else if(size<=256*30)
	threads.x=64;
	else if(size<=512*30)
	threads.x=128;
	else
	threads.x=256;

	grid.x=((size-1)+threads.x)/threads.x;
	if(grid.x>32000)
	grid.x=32000;
	while(grid.xgrid.ythreads.x<size) grid.y++;

	size*=sizeof(int);

	CudaWrapper_UploadCudaData(host_data, buffer, size);
	CudaData_Upload_Kernel_IntInt<<<grid,threads>>>((int)buffer,(int)dev_data,n[0],n[1],n[2],mode);
	cudaThreadSynchronize();
	}

	void CudaData_Download(void* host_data,void* dev_data,int host_size, int dev_size, unsigned* n,copy_mode mode,void* buffer)
	{
	}

cuda_data.cuNo OneTemporaryActions

File Metadata

cuda_data.cuView Options

Event Timeline

cuda_data.cu
No OneTemporary
Actions

cuda_data.cu
View Options