Page MenuHomec4science

neigh_full_cuda.cpp
No OneTemporary

File Metadata

Created
Wed, Nov 6, 23:17

neigh_full_cuda.cpp

/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include "neighbor_cuda.h"
#include "neigh_list.h"
#include "atom.h"
#include "domain.h"
#include "group.h"
#include "error.h"
#include "cuda_neigh_list.h"
#include "cuda.h"
#include "neighbor_cu.h"
#include <cmath>
using namespace LAMMPS_NS;
/* ----------------------------------------------------------------------
N^2 search for all neighbors
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void NeighborCuda::full_bin_cuda(NeighList *list)
{
MYDBG(printf(" # CUDA::NeighFullBinCuda ... start\n");)
if(includegroup) error->warning(FLERR,"Warning using inlcudegroup neighborbuild. This is not yet supported by CUDA neighborbuild styles.\n");
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
if(nlocal==0) return;
CudaNeighList* clist=list->cuda_list;
cuda_shared_neighlist* slist=&clist->sneighlist;
if(not clist) cuda->registerNeighborList(list);
clist->build_cuda=true;
if(slist->bin_extraspace<0.09)
{
for(int i=1;i<=atom->ntypes;i++)
for(int j=1;j<=atom->ntypes;j++)
{
if(slist->maxcut<cutneighsq[i][j]) slist->maxcut=cutneighsq[i][j];
}
slist->maxcut=sqrt(slist->maxcut);
}
int bin_dim_tmp[3];
int bin_nmax_tmp;
//printf("Hallo\n");
timespec starttime,endtime;
do
{
do
{
bin_dim_tmp[0]=static_cast <int> ((domain->subhi[0]-domain->sublo[0])/slist->maxcut);
bin_dim_tmp[1]=static_cast <int> ((domain->subhi[1]-domain->sublo[1])/slist->maxcut);
bin_dim_tmp[2]=static_cast <int> ((domain->subhi[2]-domain->sublo[2])/slist->maxcut);
if(bin_dim_tmp[0]==0) bin_dim_tmp[0]+=1;
if(bin_dim_tmp[1]==0) bin_dim_tmp[1]+=1;
if(bin_dim_tmp[2]==0) bin_dim_tmp[2]+=1;
bin_nmax_tmp=static_cast <int> ((1.0+slist->bin_extraspace)*nlocal/(bin_dim_tmp[0]*bin_dim_tmp[1]*bin_dim_tmp[2]));
bin_dim_tmp[0]+=4;
bin_dim_tmp[1]+=4;
bin_dim_tmp[2]+=4;
if(bin_nmax_tmp<32) slist->maxcut*=1.2;
// printf("slist->maxcut: %lf\n", slist->maxcut);
} while(bin_nmax_tmp<32);
if((slist->bin_dim[0]!=bin_dim_tmp[0])||(slist->bin_dim[1]!=bin_dim_tmp[1])||(slist->bin_dim[2]!=bin_dim_tmp[2])||(slist->bin_nmax!=bin_nmax_tmp))
{
if(slist->binned_id!=NULL)
CudaWrapper_FreeCudaData(slist->binned_id,slist->bin_dim[0]*slist->bin_dim[1]*slist->bin_dim[2]*slist->bin_nmax*sizeof(int));
slist->bin_dim[0] = bin_dim_tmp[0];
slist->bin_dim[1] = bin_dim_tmp[1];
slist->bin_dim[2] = bin_dim_tmp[2];
slist->bin_nmax = bin_nmax_tmp;
slist->binned_id=(int*) CudaWrapper_AllocCudaData(slist->bin_dim[0]*slist->bin_dim[1]*slist->bin_dim[2]*slist->bin_nmax*sizeof(int));
//printf("slist->bin: %i %i %i %i \n", bin_dim_tmp[0],bin_dim_tmp[1],bin_dim_tmp[2],bin_nmax_tmp);
}
//if(list->cuda_list->sneighlist.bin_nmax>512) error->all(FLERR,"To many atoms per bin. Likely cause is very long pair cutoff. This needs major rewrite of code and is not yet scheduled to be done.\n");
}while(Cuda_BinAtoms(&cuda->shared_data, &list->cuda_list->sneighlist));
// cuda->cu_debugdata->memset_device(0);
int maxneighbors=slist->maxneighbors;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
if((nex_type!=slist->nex_type)||
(nex_group!=slist->nex_group)||
(nex_mol!=slist->nex_mol))
{
slist->nex_type=nex_type;
slist->nex_group=nex_group;
slist->nex_mol=nex_mol;
//printf("%i %i %i\n",nex_type,nex_group,nex_mol);
if(nex_type)
{
delete clist->cu_ex_type;
clist->cu_ex_type=new cCudaData<int , int , x> (&ex_type[0][0] , & slist->ex_type , (atom->ntypes+1)*(atom->ntypes+1) );
clist->cu_ex_type->upload();
}
//printf("AA %i %i %i\n",nex_type,nex_group,nex_mol);
if(nex_group)
{
delete clist->cu_ex1_bit;
clist->cu_ex1_bit=new cCudaData<int , int , x> (ex1_bit , & slist->ex1_bit , nex_group );
clist->cu_ex1_bit->upload();
//printf("A %i %i %i\n",nex_type,nex_group,nex_mol);
delete clist->cu_ex2_bit;
clist->cu_ex2_bit=new cCudaData<int , int , x> (ex2_bit , & slist->ex2_bit , nex_group );
clist->cu_ex2_bit->upload();
}
//printf("B %i %i %i\n",nex_type,nex_group,nex_mol);
if(nex_mol)
{
delete clist->cu_ex_mol_bit;
clist->cu_ex_mol_bit=new cCudaData<int , int , x> (ex_mol_bit , & slist->ex_mol_bit , nex_mol );
clist->cu_ex_mol_bit->upload();
}
//printf("C %i %i %i\n",nex_type,nex_group,nex_mol);
}
int overflow = 0;
int inum = 0;
int npnt = 0;
do
{
npnt=0;
inum=0;
overflow=0;
clist->grow_device();
slist->cutneighsq=cutneighsq;
slist->maxneighbors=maxneighbors;
slist->inum = list->inum = nlocal;
//list->cuda_list->grow_device();
if(cuda->shared_data.overlap_comm)
{
list->cuda_list->inum_border=0;
list->cuda_list->cu_inum_border->upload();
}
cuda->shared_data.atom.nall=nall;
//Cuda_NeighborReBuildFirstneigh(&cuda->shared_data, &list->cuda_list->sneighlist);
overflow= Cuda_NeighborBuildFullBin(&cuda->shared_data, &list->cuda_list->sneighlist);
/*cuda->cu_debugdata->download();
printf("Debugdata: %i ",cuda->debugdata[0]);
for(int i=0;i<cuda->debugdata[0];i+=3) printf("// %i %i %i",cuda->debugdata[i+1],cuda->debugdata[i+2],cuda->debugdata[i+3]);
printf("\n");*/
//printf("maxneighborsA: %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax);
if(overflow<0)
{
maxneighbors+=32;
if(-overflow>maxneighbors) maxneighbors=((-overflow+37)/32)*32;
delete list->cuda_list->cu_neighbors;
delete [] list->cuda_list->neighbors;
list->cuda_list->neighbors= new int[slist->maxlocal*maxneighbors];
list->cuda_list->sneighlist.maxneighbors=maxneighbors;
//printf("maxneighborsA1: %i %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax,slist->maxlocal);
list->cuda_list->cu_neighbors= new cCudaData<int, int, x> (list->cuda_list->neighbors , & list->cuda_list->sneighlist.neighbors, slist->maxlocal*maxneighbors );
//printf("maxneighborsA2: %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax);
if(cuda->shared_data.overlap_comm)
{
list->cuda_list->sneighlist.maxneighbors=maxneighbors;
list->cuda_list->dev_free();
list->cuda_list->dev_alloc();
}
//printf("maxneighborsA3: %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax);
}
//printf("maxneighborsB: %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax);
if(cuda->shared_data.overlap_comm)
{
list->cuda_list->cu_inum_border->download();
list->cuda_list->sneighlist.inum_border2=list->cuda_list->inum_border;
}
}
while(overflow<0);
//cuda->cu_debugdata->download();
// printf("Differences in: %i\n",cuda->debugdata[0]);
// for(int i=0;i<20;i++) printf("%i %i %i %i// ",cuda->debugdata[4*i+1],cuda->debugdata[4*i+2],cuda->debugdata[4*i+3],cuda->debugdata[4*i+4]);
// printf("\n");
/*for(int i=0;i<10;i++)
{
printf("%i %i // ",i,numneigh[i]);
for(int j=0;j<numneigh[i];j++)
printf("%i ",list->cuda_list->neighbors[i+j*nlocal]);
printf("\n");
}*/
/* int count=0;
if(cuda->shared_data.overlap_comm)
{
list->cuda_list->cu_inum_border->download();
list->cuda_list->cu_ilist_border->download();
list->cuda_list->cu_numneigh_border->download();
list->cuda_list->cu_numneigh_inner->download();
list->cuda_list->cu_neighbors->download();
list->cuda_list->cu_neighbors_inner->download();
list->cuda_list->cu_neighbors_border->download();
//list->cuda_list->cu_firstneigh->download();
// list->cuda_list->nl_download();
list->cuda_list->cu_numneigh->download();
int diff=0;
//for(int i=0;i<nlocal;i++)*/
/* int i=123;
{
int k=-1;
//printf("inum_border: %i\n",list->cuda_list->inum_border);
//for(int j=0;j<list->numneigh[i];j++) printf("%i ",list->firstneigh[i][j]);printf("\n");
for(int j=0;j<list->cuda_list->inum_border;j++)
if(list->cuda_list->ilist_border[j]==i) k=j;
int d=numneigh[i]-list->cuda_list->numneigh_inner[i];
if(k>-1) d-=list->cuda_list->numneigh_border[k];
if(d!=0) {printf("Error at %i %i %i %i %i\n",i,k,d,numneigh[i],list->cuda_list->numneigh_inner[i]); diff++;}
if(k>-1 && count<10)
{
printf("Numneighs: %i %i %i Border_i: %i %i\n",numneigh[i],list->cuda_list->numneigh_inner[i],list->cuda_list->numneigh_border[k],k,(int)list->cuda_list->cu_ilist_border->dev_data());
cuda->shared_data.me=k;
for(int j=0;j<numneigh[i];j++)
printf("%i ",list->cuda_list->neighbors[i+j*nlocal]);
printf("\n");
for(int j=0;j<list->cuda_list->numneigh_inner[i];j++)
printf("%i ",list->cuda_list->neighbors_inner[i+j*nlocal]);
printf(" // ");
for(int j=0;j<list->cuda_list->numneigh_border[k];j++)
printf("%i ",list->cuda_list->neighbors_border[k+j*nlocal]);
printf("\n");
count++;
}
}
printf("%i\n",diff);
}*/
list->cuda_list->cu_numneigh->download();
list->cuda_list->cu_ilist->download();
cuda->shared_data.atom.update_neigh=2;
//printf("Done\n");
MYDBG(printf(" # CUDA::NeighFullBinCuda ... end\n");)
}
void NeighborCuda::full_nsq_cuda(NeighList *list)
{
printf("Full_Nsq cuda neighbor list build is not implemented anymore.\n");
return;
/*
MYDBG(printf(" # CUDA::NeighFullNSQCuda ... start\n");)
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
if(cuda->cu_xhold) cuda->cu_xhold->upload();
if(not list->cuda_list) cuda->registerNeighborList(list);
list->cuda_list->build_cuda=true;
int maxneighbors=list->cuda_list->sneighlist.maxneighbors;
int neigh_lists_per_page=pgsize/maxneighbors;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int **pages = list->pages;
int overflow = 0;
int inum = 0;
int npage = 0;
int npnt = 0;
do
{
npage=0;
npnt=0;
inum=0;
overflow=0;
neigh_lists_per_page=pgsize/maxneighbors;
npage=(2*nlocal*maxneighbors-1)/pgsize;
while(npage>list->maxpage) list->add_pages();
pages = list->pages;
npage=0;
list->cuda_list->sneighlist.neigh_lists_per_page=pgsize/maxneighbors;
list->cuda_list->grow_device();
list->cuda_list->sneighlist.cutneighsq=cutneighsq;
list->cuda_list->sneighlist.maxneighbors=maxneighbors;
list->cuda_list->sneighlist.inum = list->inum = nlocal;
cuda->shared_data.atom.nall=nall;
Cuda_NeighborReBuildFirstneigh(&cuda->shared_data, &list->cuda_list->sneighlist);
overflow= not Cuda_NeighborBuildFullNsq(&cuda->shared_data, &list->cuda_list->sneighlist);
if(overflow) maxneighbors+=32;
}
while(overflow);
if(not cudable) list->cuda_list->nl_download();
MYDBG(printf(" # CUDA::NeighFullNSQCuda ... end\n");)
*/
}

Event Timeline