Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F77703254
binning.cu
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, Aug 15, 23:27
Size
9 KB
Mime Type
text/x-c
Expires
Sat, Aug 17, 23:27 (2 d)
Engine
blob
Format
Raw Data
Handle
19787882
Attached To
rLAMMPS lammps
binning.cu
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifdef CUDA_USE_BINNING
#include <stdio.h>
#define MY_PREFIX binning
#include "cuda_shared.h"
#include "cuda_common.h"
#include "crm_cuda_utils.cu"
#include "binning_cu.h"
#include "binning_kernel.cu"
void Cuda_PreBinning(cuda_shared_data* sdata)
{
// initialize only on first call
short init = 0;
if(! init)
{
init = 1;
int cuda_dummy_type = sdata->atom.ntypes + 1;
X_FLOAT outside[3] =
{
(sdata->domain.subhi[0] - sdata->domain.sublo[0])/1000.0,
(sdata->domain.subhi[1] - sdata->domain.sublo[1])/1000.0,
(sdata->domain.subhi[2] - sdata->domain.sublo[2])/1000.0
};
cudaMemcpyToSymbol("binned_size_all" , & sdata->atom.binned_type.dim[0] , sizeof(unsigned) );
cudaMemcpyToSymbol("cuda_dummy_type" , & cuda_dummy_type , sizeof(int) );
cudaMemcpyToSymbol("outside" , & outside , sizeof(X_FLOAT)*3);
cudaMemcpyToSymbol(MY_CONST(binned_type), & sdata->atom.binned_type.dev_data, sizeof(int*) );
cudaMemcpyToSymbol(MY_CONST(binned_x) , & sdata->atom.binned_x .dev_data, sizeof(X_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(binned_tag) , & sdata->atom.binned_tag .dev_data, sizeof(int*) );
cudaMemcpyToSymbol(MY_CONST(subhi) , sdata->domain.subhi , sizeof(X_FLOAT)*3);
// bin_nmax == blockDim.x
// printf("# CUDA: MY_CONST(binned_type) = %s\n", MY_CONST(binned_type));
// int* p = pre_binning_binned_type; // pre_binning_binned_type is defined here!!
}
dim3 grid(sdata->domain.bin_dim[0], sdata->domain.bin_dim[1] * sdata->domain.bin_dim[2], 1);
dim3 threads(sdata->domain.bin_nmax, 1, 1);
MYDBG(printf("# CUDA: Cuda_PreBinning: pre binning grid = (%u, %u, %u)\n", grid.x, grid.y, grid.z);)
MYDBG(printf("# CUDA: Cuda_PreBinning: pre binning threads = (%u, %u, %u)\n", threads.x, threads.y, threads.z); )
PreBinning_Kernel<<<grid, threads>>> ();
cudaThreadSynchronize();
MYDBG(printf("ERROR-CUDA pre_binning: %s\n",cudaGetErrorString(cudaGetLastError())));
CUT_CHECK_ERROR("Cuda_PreBinning: binning Kernel execution failed");
}
void Cuda_Binning(cuda_shared_data* sdata)
{
MYDBG( // check assumption in debug mode
if(sdata->atom.x.dim[1] != 3)
{
printf("# CUDA: Cuda_Binning: binning error: atom array dimensions not Nx3\n");
return;
}
)
// initialize only on first call
short init = 0;
if(! init)
{
init = 0;
X_FLOAT const_rez_bin_size[3] =
{
(1.0 * sdata->domain.bin_dim[0]-4.0) / (sdata->domain.subhi[0] - sdata->domain.sublo[0]),
(1.0 * sdata->domain.bin_dim[1]-4.0) / (sdata->domain.subhi[1] - sdata->domain.sublo[1]),
(1.0 * sdata->domain.bin_dim[2]-4.0) / (sdata->domain.subhi[2] - sdata->domain.sublo[2])
};
cudaMemcpyToSymbol("bin_error_count" , & sdata->atom.bin_error_count.dev_data, sizeof(X_FLOAT)*1);
cudaMemcpyToSymbol("rez_bin_size" , & const_rez_bin_size , sizeof(X_FLOAT)*3);
cudaMemcpyToSymbol(MY_CONST(bin_count_all) , & sdata->atom.bin_count_all .dev_data, sizeof(unsigned*));
cudaMemcpyToSymbol(MY_CONST(bin_count_local), & sdata->atom.bin_count_local.dev_data, sizeof(unsigned*));
cudaMemcpyToSymbol(MY_CONST(bin_dim) , sdata->domain.bin_dim , sizeof(int3) );
cudaMemcpyToSymbol(MY_CONST(bin_nmax) , & sdata->domain.bin_nmax , sizeof(unsigned) );
cudaMemcpyToSymbol(MY_CONST(binned_f) , & sdata->atom.binned_f .dev_data, sizeof(F_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(binned_q) , & sdata->atom.binned_q .dev_data, sizeof(F_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(binned_rmass) , & sdata->atom.binned_rmass .dev_data, sizeof(V_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(binned_tag) , & sdata->atom.binned_tag .dev_data, sizeof(int*) );
cudaMemcpyToSymbol(MY_CONST(binned_type) , & sdata->atom.binned_type .dev_data, sizeof(int*) );
cudaMemcpyToSymbol(MY_CONST(binned_v) , & sdata->atom.binned_v .dev_data, sizeof(V_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(binpos) , & sdata->atom.binpos .dev_data, sizeof(int*));
cudaMemcpyToSymbol(MY_CONST(f) , & sdata->atom.f .dev_data, sizeof(F_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(natoms) , & sdata->atom.nall , sizeof(unsigned) );
cudaMemcpyToSymbol(MY_CONST(nghost) , & sdata->atom.nghost , sizeof(unsigned) );
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(unsigned) );
cudaMemcpyToSymbol(MY_CONST(nmax) , & sdata->atom.nmax , sizeof(unsigned) );
cudaMemcpyToSymbol(MY_CONST(q) , & sdata->atom.q .dev_data, sizeof(F_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(rmass) , & sdata->atom.rmass .dev_data, sizeof(V_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(sublo) , sdata->domain.sublo , sizeof(X_FLOAT)*3);
cudaMemcpyToSymbol(MY_CONST(tag) , & sdata->atom.tag .dev_data, sizeof(int*) );
cudaMemcpyToSymbol(MY_CONST(type) , & sdata->atom.type .dev_data, sizeof(int*) );
cudaMemcpyToSymbol(MY_CONST(v) , & sdata->atom.v .dev_data, sizeof(V_FLOAT*) );
}
dim3 grid((unsigned)(1 + sdata->atom.nlocal/64.0), 1, 1);
MYDBG( printf("# CUDA: Cuda_Binning: grid dim.x = %u (nlocal: %i)\n", grid.x,sdata->atom.nlocal); )
dim3 threads(64, 1, 1);
cudaMemset((int*) (sdata->atom.bin_count_all.dev_data),0,sizeof(int)*(sdata->domain.bin_dim[0])*(sdata->domain.bin_dim[1])*(sdata->domain.bin_dim[2]));
cudaMemset((int*) (sdata->atom.bin_count_local.dev_data),0,sizeof(int)*(sdata->domain.bin_dim[0])*(sdata->domain.bin_dim[1])*(sdata->domain.bin_dim[2]));
cudaMemset(sdata->atom.bin_error_count.dev_data,0,sizeof(int)*1);
int binning_error_l[1];
Binning_Kernel<<<grid, threads>>> (
(X_FLOAT*) (sdata->atom. x.dev_data),
(X_FLOAT*) (sdata->atom.binned_x.dev_data),
sdata->atom.q_flag,
0,
sdata->atom.rmass_flag
);
cudaThreadSynchronize();
cudaMemcpy((void*) binning_error_l,(void*) sdata->atom.bin_error_count.dev_data,1*sizeof(int),cudaMemcpyDeviceToHost);
if(binning_error_l[0]!=0)
{
printf("CUDA-ERROR: binning local: could not bin %i atoms\n",binning_error_l[0]);
}
CUT_CHECK_ERROR("Cuda_Binning: binning Kernel execution failed");
grid.x=(unsigned)(1 + (sdata->atom.nall-sdata->atom.nlocal)/32.0);
MYDBG( printf("# CUDA: Cuda_Binning Ghost: grid dim.x = %u\n", grid.x); )
Binning_Kernel<<<grid, threads>>> (
(X_FLOAT*) (sdata->atom. x.dev_data),
(X_FLOAT*) (sdata->atom.binned_x.dev_data),
sdata->atom.q_flag,
sdata->atom.nlocal,
sdata->atom.rmass_flag
);
cudaThreadSynchronize();
cudaMemcpy((void*) binning_error_l,(void*) sdata->atom.bin_error_count.dev_data,1*sizeof(int),cudaMemcpyDeviceToHost);
if(binning_error_l[0]!=0) printf("CUDA-ERROR: binning ghost: could not bin %i atoms\n",binning_error_l[0]);
}
void Cuda_ReverseBinning(cuda_shared_data* sdata)
{
// initialize only on first call
short init = 0;
if(! init)
{
init = 0;
cudaMemcpyToSymbol(MY_CONST(bin_count_all) , & sdata->atom.bin_count_all .dev_data, sizeof(unsigned*));
cudaMemcpyToSymbol(MY_CONST(bin_count_local), & sdata->atom.bin_count_local.dev_data, sizeof(unsigned*));
cudaMemcpyToSymbol(MY_CONST(bin_dim) , sdata->domain.bin_dim , sizeof(int3) );
cudaMemcpyToSymbol(MY_CONST(binned_f) , & sdata->atom.binned_f .dev_data, sizeof(F_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(binned_q) , & sdata->atom.binned_q .dev_data, sizeof(F_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(binned_tag) , & sdata->atom.binned_tag .dev_data, sizeof(int*) );
cudaMemcpyToSymbol(MY_CONST(binned_type) , & sdata->atom.binned_type .dev_data, sizeof(int*) );
cudaMemcpyToSymbol(MY_CONST(binned_v) , & sdata->atom.binned_v .dev_data, sizeof(V_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(f) , & sdata->atom.f .dev_data, sizeof(F_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(natoms) , & sdata->atom.nall , sizeof(unsigned) );
cudaMemcpyToSymbol(MY_CONST(nmax) , & sdata->atom.nmax , sizeof(unsigned) );
cudaMemcpyToSymbol(MY_CONST(q) , & sdata->atom.q .dev_data, sizeof(F_FLOAT*) );
cudaMemcpyToSymbol(MY_CONST(tag) , & sdata->atom.tag .dev_data, sizeof(int*) );
cudaMemcpyToSymbol(MY_CONST(type) , & sdata->atom.type .dev_data, sizeof(int*) );
cudaMemcpyToSymbol(MY_CONST(v) , & sdata->atom.v .dev_data, sizeof(V_FLOAT*) );
}
dim3 grid((unsigned)(1 + sdata->atom.nlocal/32.0), 1, 1);
MYDBG( printf("# CUDA: Cuda_ReverseBinning: grid dim.x = %u (nlocal: %i)\n", grid.x,sdata->atom.nlocal); )
dim3 threads(32, 1, 1);
ReverseBinning_Kernel<<<grid, threads>>> (
(X_FLOAT*) (sdata->atom. x.dev_data),
(X_FLOAT*) (sdata->atom.binned_x.dev_data),
sdata->atom.q_flag
);
cudaThreadSynchronize();
CUT_CHECK_ERROR("Cuda_Binning: reverse binning Kernel execution failed");
}
#endif
Event Timeline
Log In to Comment