Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F107068332
neigh_full_cuda.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Apr 4, 05:15
Size
11 KB
Mime Type
text/x-c
Expires
Sun, Apr 6, 05:15 (2 d)
Engine
blob
Format
Raw Data
Handle
25344685
Attached To
rLAMMPS lammps
neigh_full_cuda.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#include "neighbor_cuda.h"
#include "neigh_list.h"
#include "atom.h"
#include "domain.h"
#include "group.h"
#include "error.h"
#include "cuda_neigh_list.h"
#include "cuda.h"
#include "neighbor_cu.h"
#include <cmath>
using
namespace
LAMMPS_NS
;
/* ----------------------------------------------------------------------
N^2 search for all neighbors
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void
NeighborCuda
::
full_bin_cuda
(
NeighList
*
list
)
{
MYDBG
(
printf
(
" # CUDA::NeighFullBinCuda ... start
\n
"
);)
if
(
includegroup
)
error
->
warning
(
FLERR
,
"Warning using inlcudegroup neighborbuild. This is not yet supported by CUDA neighborbuild styles.
\n
"
);
int
nlocal
=
atom
->
nlocal
;
int
nall
=
nlocal
+
atom
->
nghost
;
if
(
nlocal
==
0
)
return
;
CudaNeighList
*
clist
=
list
->
cuda_list
;
cuda_shared_neighlist
*
slist
=&
clist
->
sneighlist
;
if
(
not
clist
)
cuda
->
registerNeighborList
(
list
);
clist
->
build_cuda
=
true
;
if
(
slist
->
bin_extraspace
<
0.09
)
{
for
(
int
i
=
1
;
i
<=
atom
->
ntypes
;
i
++
)
for
(
int
j
=
1
;
j
<=
atom
->
ntypes
;
j
++
)
{
if
(
slist
->
maxcut
<
cutneighsq
[
i
][
j
])
slist
->
maxcut
=
cutneighsq
[
i
][
j
];
}
slist
->
maxcut
=
sqrt
(
slist
->
maxcut
);
}
int
bin_dim_tmp
[
3
];
int
bin_nmax_tmp
;
//printf("Hallo\n");
timespec
starttime
,
endtime
;
do
{
do
{
bin_dim_tmp
[
0
]
=
static_cast
<
int
>
((
domain
->
subhi
[
0
]
-
domain
->
sublo
[
0
])
/
slist
->
maxcut
);
bin_dim_tmp
[
1
]
=
static_cast
<
int
>
((
domain
->
subhi
[
1
]
-
domain
->
sublo
[
1
])
/
slist
->
maxcut
);
bin_dim_tmp
[
2
]
=
static_cast
<
int
>
((
domain
->
subhi
[
2
]
-
domain
->
sublo
[
2
])
/
slist
->
maxcut
);
if
(
bin_dim_tmp
[
0
]
==
0
)
bin_dim_tmp
[
0
]
+=
1
;
if
(
bin_dim_tmp
[
1
]
==
0
)
bin_dim_tmp
[
1
]
+=
1
;
if
(
bin_dim_tmp
[
2
]
==
0
)
bin_dim_tmp
[
2
]
+=
1
;
bin_nmax_tmp
=
static_cast
<
int
>
((
1.0
+
slist
->
bin_extraspace
)
*
nlocal
/
(
bin_dim_tmp
[
0
]
*
bin_dim_tmp
[
1
]
*
bin_dim_tmp
[
2
]));
bin_dim_tmp
[
0
]
+=
4
;
bin_dim_tmp
[
1
]
+=
4
;
bin_dim_tmp
[
2
]
+=
4
;
if
(
bin_nmax_tmp
<
32
)
slist
->
maxcut
*=
1.2
;
// printf("slist->maxcut: %lf\n", slist->maxcut);
}
while
(
bin_nmax_tmp
<
32
);
if
((
slist
->
bin_dim
[
0
]
!=
bin_dim_tmp
[
0
])
||
(
slist
->
bin_dim
[
1
]
!=
bin_dim_tmp
[
1
])
||
(
slist
->
bin_dim
[
2
]
!=
bin_dim_tmp
[
2
])
||
(
slist
->
bin_nmax
!=
bin_nmax_tmp
))
{
if
(
slist
->
binned_id
!=
NULL
)
CudaWrapper_FreeCudaData
(
slist
->
binned_id
,
slist
->
bin_dim
[
0
]
*
slist
->
bin_dim
[
1
]
*
slist
->
bin_dim
[
2
]
*
slist
->
bin_nmax
*
sizeof
(
int
));
slist
->
bin_dim
[
0
]
=
bin_dim_tmp
[
0
];
slist
->
bin_dim
[
1
]
=
bin_dim_tmp
[
1
];
slist
->
bin_dim
[
2
]
=
bin_dim_tmp
[
2
];
slist
->
bin_nmax
=
bin_nmax_tmp
;
slist
->
binned_id
=
(
int
*
)
CudaWrapper_AllocCudaData
(
slist
->
bin_dim
[
0
]
*
slist
->
bin_dim
[
1
]
*
slist
->
bin_dim
[
2
]
*
slist
->
bin_nmax
*
sizeof
(
int
));
//printf("slist->bin: %i %i %i %i \n", bin_dim_tmp[0],bin_dim_tmp[1],bin_dim_tmp[2],bin_nmax_tmp);
}
//if(list->cuda_list->sneighlist.bin_nmax>512) error->all(FLERR,"To many atoms per bin. Likely cause is very long pair cutoff. This needs major rewrite of code and is not yet scheduled to be done.\n");
}
while
(
Cuda_BinAtoms
(
&
cuda
->
shared_data
,
&
list
->
cuda_list
->
sneighlist
));
// cuda->cu_debugdata->memset_device(0);
int
maxneighbors
=
slist
->
maxneighbors
;
int
*
ilist
=
list
->
ilist
;
int
*
numneigh
=
list
->
numneigh
;
if
((
nex_type
!=
slist
->
nex_type
)
||
(
nex_group
!=
slist
->
nex_group
)
||
(
nex_mol
!=
slist
->
nex_mol
))
{
slist
->
nex_type
=
nex_type
;
slist
->
nex_group
=
nex_group
;
slist
->
nex_mol
=
nex_mol
;
//printf("%i %i %i\n",nex_type,nex_group,nex_mol);
if
(
nex_type
)
{
delete
clist
->
cu_ex_type
;
clist
->
cu_ex_type
=
new
cCudaData
<
int
,
int
,
x
>
(
&
ex_type
[
0
][
0
]
,
&
slist
->
ex_type
,
(
atom
->
ntypes
+
1
)
*
(
atom
->
ntypes
+
1
)
);
clist
->
cu_ex_type
->
upload
();
}
//printf("AA %i %i %i\n",nex_type,nex_group,nex_mol);
if
(
nex_group
)
{
delete
clist
->
cu_ex1_bit
;
clist
->
cu_ex1_bit
=
new
cCudaData
<
int
,
int
,
x
>
(
ex1_bit
,
&
slist
->
ex1_bit
,
nex_group
);
clist
->
cu_ex1_bit
->
upload
();
//printf("A %i %i %i\n",nex_type,nex_group,nex_mol);
delete
clist
->
cu_ex2_bit
;
clist
->
cu_ex2_bit
=
new
cCudaData
<
int
,
int
,
x
>
(
ex2_bit
,
&
slist
->
ex2_bit
,
nex_group
);
clist
->
cu_ex2_bit
->
upload
();
}
//printf("B %i %i %i\n",nex_type,nex_group,nex_mol);
if
(
nex_mol
)
{
delete
clist
->
cu_ex_mol_bit
;
clist
->
cu_ex_mol_bit
=
new
cCudaData
<
int
,
int
,
x
>
(
ex_mol_bit
,
&
slist
->
ex_mol_bit
,
nex_mol
);
clist
->
cu_ex_mol_bit
->
upload
();
}
//printf("C %i %i %i\n",nex_type,nex_group,nex_mol);
}
int
overflow
=
0
;
int
inum
=
0
;
int
npnt
=
0
;
do
{
npnt
=
0
;
inum
=
0
;
overflow
=
0
;
clist
->
grow_device
();
slist
->
cutneighsq
=
cutneighsq
;
slist
->
maxneighbors
=
maxneighbors
;
slist
->
inum
=
list
->
inum
=
nlocal
;
//list->cuda_list->grow_device();
if
(
cuda
->
shared_data
.
overlap_comm
)
{
list
->
cuda_list
->
inum_border
=
0
;
list
->
cuda_list
->
cu_inum_border
->
upload
();
}
cuda
->
shared_data
.
atom
.
nall
=
nall
;
//Cuda_NeighborReBuildFirstneigh(&cuda->shared_data, &list->cuda_list->sneighlist);
overflow
=
Cuda_NeighborBuildFullBin
(
&
cuda
->
shared_data
,
&
list
->
cuda_list
->
sneighlist
);
/*cuda->cu_debugdata->download();
printf("Debugdata: %i ",cuda->debugdata[0]);
for(int i=0;i<cuda->debugdata[0];i+=3) printf("// %i %i %i",cuda->debugdata[i+1],cuda->debugdata[i+2],cuda->debugdata[i+3]);
printf("\n");*/
//printf("maxneighborsA: %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax);
if
(
overflow
<
0
)
{
maxneighbors
+=
32
;
if
(
-
overflow
>
maxneighbors
)
maxneighbors
=
((
-
overflow
+
37
)
/
32
)
*
32
;
delete
list
->
cuda_list
->
cu_neighbors
;
delete
[]
list
->
cuda_list
->
neighbors
;
list
->
cuda_list
->
neighbors
=
new
int
[
slist
->
maxlocal
*
maxneighbors
];
list
->
cuda_list
->
sneighlist
.
maxneighbors
=
maxneighbors
;
//printf("maxneighborsA1: %i %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax,slist->maxlocal);
list
->
cuda_list
->
cu_neighbors
=
new
cCudaData
<
int
,
int
,
x
>
(
list
->
cuda_list
->
neighbors
,
&
list
->
cuda_list
->
sneighlist
.
neighbors
,
slist
->
maxlocal
*
maxneighbors
);
//printf("maxneighborsA2: %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax);
if
(
cuda
->
shared_data
.
overlap_comm
)
{
list
->
cuda_list
->
sneighlist
.
maxneighbors
=
maxneighbors
;
list
->
cuda_list
->
dev_free
();
list
->
cuda_list
->
dev_alloc
();
}
//printf("maxneighborsA3: %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax);
}
//printf("maxneighborsB: %i %i %i %i\n",maxneighbors,pgsize,oneatom,atom->nmax);
if
(
cuda
->
shared_data
.
overlap_comm
)
{
list
->
cuda_list
->
cu_inum_border
->
download
();
list
->
cuda_list
->
sneighlist
.
inum_border2
=
list
->
cuda_list
->
inum_border
;
}
}
while
(
overflow
<
0
);
//cuda->cu_debugdata->download();
// printf("Differences in: %i\n",cuda->debugdata[0]);
// for(int i=0;i<20;i++) printf("%i %i %i %i// ",cuda->debugdata[4*i+1],cuda->debugdata[4*i+2],cuda->debugdata[4*i+3],cuda->debugdata[4*i+4]);
// printf("\n");
/*for(int i=0;i<10;i++)
{
printf("%i %i // ",i,numneigh[i]);
for(int j=0;j<numneigh[i];j++)
printf("%i ",list->cuda_list->neighbors[i+j*nlocal]);
printf("\n");
}*/
/* int count=0;
if(cuda->shared_data.overlap_comm)
{
list->cuda_list->cu_inum_border->download();
list->cuda_list->cu_ilist_border->download();
list->cuda_list->cu_numneigh_border->download();
list->cuda_list->cu_numneigh_inner->download();
list->cuda_list->cu_neighbors->download();
list->cuda_list->cu_neighbors_inner->download();
list->cuda_list->cu_neighbors_border->download();
//list->cuda_list->cu_firstneigh->download();
// list->cuda_list->nl_download();
list->cuda_list->cu_numneigh->download();
int diff=0;
//for(int i=0;i<nlocal;i++)*/
/* int i=123;
{
int k=-1;
//printf("inum_border: %i\n",list->cuda_list->inum_border);
//for(int j=0;j<list->numneigh[i];j++) printf("%i ",list->firstneigh[i][j]);printf("\n");
for(int j=0;j<list->cuda_list->inum_border;j++)
if(list->cuda_list->ilist_border[j]==i) k=j;
int d=numneigh[i]-list->cuda_list->numneigh_inner[i];
if(k>-1) d-=list->cuda_list->numneigh_border[k];
if(d!=0) {printf("Error at %i %i %i %i %i\n",i,k,d,numneigh[i],list->cuda_list->numneigh_inner[i]); diff++;}
if(k>-1 && count<10)
{
printf("Numneighs: %i %i %i Border_i: %i %i\n",numneigh[i],list->cuda_list->numneigh_inner[i],list->cuda_list->numneigh_border[k],k,(int)list->cuda_list->cu_ilist_border->dev_data());
cuda->shared_data.me=k;
for(int j=0;j<numneigh[i];j++)
printf("%i ",list->cuda_list->neighbors[i+j*nlocal]);
printf("\n");
for(int j=0;j<list->cuda_list->numneigh_inner[i];j++)
printf("%i ",list->cuda_list->neighbors_inner[i+j*nlocal]);
printf(" // ");
for(int j=0;j<list->cuda_list->numneigh_border[k];j++)
printf("%i ",list->cuda_list->neighbors_border[k+j*nlocal]);
printf("\n");
count++;
}
}
printf("%i\n",diff);
}*/
list
->
cuda_list
->
cu_numneigh
->
download
();
list
->
cuda_list
->
cu_ilist
->
download
();
cuda
->
shared_data
.
atom
.
update_neigh
=
2
;
//printf("Done\n");
MYDBG
(
printf
(
" # CUDA::NeighFullBinCuda ... end
\n
"
);)
}
void
NeighborCuda
::
full_nsq_cuda
(
NeighList
*
list
)
{
printf
(
"Full_Nsq cuda neighbor list build is not implemented anymore.
\n
"
);
return
;
/*
MYDBG(printf(" # CUDA::NeighFullNSQCuda ... start\n");)
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
if(cuda->cu_xhold) cuda->cu_xhold->upload();
if(not list->cuda_list) cuda->registerNeighborList(list);
list->cuda_list->build_cuda=true;
int maxneighbors=list->cuda_list->sneighlist.maxneighbors;
int neigh_lists_per_page=pgsize/maxneighbors;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int **pages = list->pages;
int overflow = 0;
int inum = 0;
int npage = 0;
int npnt = 0;
do
{
npage=0;
npnt=0;
inum=0;
overflow=0;
neigh_lists_per_page=pgsize/maxneighbors;
npage=(2*nlocal*maxneighbors-1)/pgsize;
while(npage>list->maxpage) list->add_pages();
pages = list->pages;
npage=0;
list->cuda_list->sneighlist.neigh_lists_per_page=pgsize/maxneighbors;
list->cuda_list->grow_device();
list->cuda_list->sneighlist.cutneighsq=cutneighsq;
list->cuda_list->sneighlist.maxneighbors=maxneighbors;
list->cuda_list->sneighlist.inum = list->inum = nlocal;
cuda->shared_data.atom.nall=nall;
Cuda_NeighborReBuildFirstneigh(&cuda->shared_data, &list->cuda_list->sneighlist);
overflow= not Cuda_NeighborBuildFullNsq(&cuda->shared_data, &list->cuda_list->sneighlist);
if(overflow) maxneighbors+=32;
}
while(overflow);
if(not cudable) list->cuda_list->nl_download();
MYDBG(printf(" # CUDA::NeighFullNSQCuda ... end\n");)
*/
}
Event Timeline
Log In to Comment