diff --git a/src/USER-CUDA/atom_vec_angle_cuda.cpp b/src/USER-CUDA/atom_vec_angle_cuda.cpp
index 054bb68e0..417909473 100644
--- a/src/USER-CUDA/atom_vec_angle_cuda.cpp
+++ b/src/USER-CUDA/atom_vec_angle_cuda.cpp
@@ -1,476 +1,476 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include <cstdlib>
 #include <cstdio>
 #include <cstring>
 #include "atom_vec_angle_cuda.h"
 #include "comm_cuda_cu.h"
 #include "atom_vec_angle_cuda_cu.h"
 #include "atom.h"
 #include "domain.h"
 #include "modify.h"
 #include "fix.h"
 #include "memory.h"
 #include "error.h"
 #include "cuda.h"
 #include "universe.h"
 #include "comm.h"
 
 using namespace LAMMPS_NS;
 
 #define DELTA 10000
 #define BUFFACTOR 1.5
 #define BUFEXTRA 1000
 #define NCUDAEXCHANGE 12 //nextra x y z vx vy vz tag type mask image molecule
 
 #define BUF_FLOAT double
 /* ---------------------------------------------------------------------- */
 
 AtomVecAngleCuda::AtomVecAngleCuda(LAMMPS *lmp) : AtomVecAngle(lmp)
 {
    cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
    maxsend=0;
    cudable=true;
    cuda_init_done=false;
    max_nsend=0;
    cu_copylist=NULL;
    copylist=NULL;
    copylist2=NULL;
 }
 
 void AtomVecAngleCuda::grow_copylist(int new_max_nsend)
 {
   max_nsend=new_max_nsend;
   delete cu_copylist;
   delete [] copylist2;
   if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist);
   copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false);
   copylist2 = new int[max_nsend];
   cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend);
 }
 
 void AtomVecAngleCuda::grow_send(int n,double** buf_send,int flag)  //need to be able to grow the comm send_buffer since the array sahll be copied from the gpu in whole
 {
   int old_maxsend=*maxsend+BUFEXTRA;
   *maxsend = static_cast<int> (BUFFACTOR * n);
   if (flag)
   {
     if(cuda->pinned)
     {
       double* tmp = new double[old_maxsend];
       memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double));
       if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
       *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
       memcpy(*buf_send,tmp,old_maxsend*sizeof(double));
       delete [] tmp;
     }
     else
     {
      *buf_send = (double *)
       memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double),
                        "comm:buf_send");
     }
   }
   else {
    if(cuda->pinned)
     {
       if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
       *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
     }
     else
     {
       memory->sfree(*buf_send);
       *buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double),
                                           "comm:buf_send");
     }
   }
 }
 
 void AtomVecAngleCuda::grow_both(int n)
 {
   if(cuda->finished_setup)
   {
     cuda->cu_special->upload();
     cuda->cu_nspecial->upload();
     cuda->downloadAll();
   }
   AtomVecAngle::grow(n);
   if(cuda->finished_setup)
   {
     cuda->checkResize();
     cuda->uploadAll();
   }
 }
 
 int AtomVecAngleCuda::pack_comm(int n, int* iswap, double *buf,
                              int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecAngle::pack_comm(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
         if((sizeof(X_FLOAT)!=sizeof(double)) && m)
           m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
         return m;
 }
 
 int AtomVecAngleCuda::pack_comm_vel(int n, int* iswap, double *buf,
                              int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecAngle::pack_comm_vel(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
         if((sizeof(X_FLOAT)!=sizeof(double)) && m)
           m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
         return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecAngleCuda::unpack_comm(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecAngle::unpack_comm(n,first,buf); return;}
 
   Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
 }
 
 void AtomVecAngleCuda::unpack_comm_vel(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecAngle::unpack_comm_vel(n,first,buf); return;}
 
   Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecAngleCuda::pack_reverse(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only forces are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecAngle::pack_reverse(n,first,buf);
 
   int i,m,last;
   cuda->cu_f->download();
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     buf[m++] = f[i][0];
     buf[m++] = f[i][1];
     buf[m++] = f[i][2];
   }
   cuda->cu_f->upload();
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecAngleCuda::unpack_reverse(int n, int *list, double *buf)//usually this should not be called since comm->communicate handles the communication if only forces are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
           {AtomVecAngle::unpack_reverse(n,list,buf); return;}
 
   int i,j,m;
 
   m = 0;
   cuda->cu_f->download();
   for (i = 0; i < n; i++) {
     j = list[i];
     f[j][0] += buf[m++];
     f[j][1] += buf[m++];
     f[j][2] += buf[m++];
   }
   cuda->cu_f->upload();
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecAngleCuda::pack_border(int n, int *iswap, double *buf,
                                int pbc_flag, int *pbc)
 {
  if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecAngle::pack_border(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_AtomVecAngleCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
   return m;
 }
 
 int AtomVecAngleCuda::pack_border_vel(int n, int *iswap, double *buf,
                                int pbc_flag, int *pbc)
 {
  if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecAngle::pack_border_vel(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_AtomVecAngleCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecAngleCuda::unpack_border(int n, int first, double *buf)
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecAngle::unpack_border(n,first,buf); return;}
   while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
   {
           grow_both(0);
   }
   int flag=Cuda_AtomVecAngleCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf);
   if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
 }
 
 void AtomVecAngleCuda::unpack_border_vel(int n, int first, double *buf)
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecAngle::unpack_border_vel(n,first,buf); return;}
   while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
   {
           grow_both(0);
   }
   int flag=Cuda_AtomVecAngleCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf);
   if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
 }
 
 /* ----------------------------------------------------------------------
    pack data for atom I for sending to another proc
    xyz must be 1st 3 values, so comm::exchange() can test on them
 ------------------------------------------------------------------------- */
 
 
 int AtomVecAngleCuda::pack_exchange(int dim, double *buf)
 {
   if(cuda->oncpu)
           return AtomVecAngle::pack_exchange(dim,buf);
 
   if(not cuda_init_done||domain->box_change)
   {
           Cuda_AtomVecAngleCuda_Init(&cuda->shared_data);
           cuda_init_done=true;
   }
   double** buf_pointer=(double**) buf;
   if(*maxsend<atom->nghost || *buf_pointer==NULL)
   {
           grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0);
           *maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend;
   }
 
   if(max_nsend==0) grow_copylist(200);
 
   int nsend_atoms = Cuda_AtomVecAngleCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
 
   if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
   if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
   {
           grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
           Cuda_AtomVecAngleCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
   }
 
   int nlocal=atom->nlocal-nsend_atoms;
 
   for(int i=0;i<nsend_atoms;i++) copylist2[i]=1;
   for(int j=1;j<nsend_atoms+1;j++)
   {
           int i = static_cast <int> ((*buf_pointer)[j]);
           if(i>=nlocal) copylist2[i-nlocal]=-1;
   }
 
   int actpos=0;
   for(int j=1;j<nsend_atoms+1;j++)
   {
           int i = static_cast <int> ((*buf_pointer)[j]);
           if(i<nlocal)
           {
             while(copylist2[actpos]==-1) actpos++;
               copylist[j-1]=nlocal+actpos;
             actpos++;
           }
   }
   cu_copylist->upload();
 
   cuda->shared_data.atom.nlocal=nlocal;
 
   int m = Cuda_AtomVecAngleCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
 
   timespec time1,time2;
   clock_gettime(CLOCK_REALTIME,&time1);
 
   double* buf_p=*buf_pointer;
   for(int j=0;j<nsend_atoms;j++)
   {
     int i=static_cast <int> (buf_p[j+1]);
     int nextra=0;
     int k;
     buf_p[m++] = num_bond[i];
     for (k = 0; k < num_bond[i]; k++) {
       buf_p[m++] = bond_type[i][k];
       buf_p[m++] = bond_atom[i][k];
     }
     nextra+=2*num_bond[i]+1;
     if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
 
     buf_p[m++] = num_angle[i];
     for (k = 0; k < num_angle[i]; k++) {
       buf_p[m++] = angle_type[i][k];
       buf_p[m++] = angle_atom1[i][k];
       buf_p[m++] = angle_atom2[i][k];
       buf_p[m++] = angle_atom3[i][k];
     }
     nextra+=4*num_angle[i]+1;
     if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
 
     buf_p[m++] = nspecial[i][0];
     buf_p[m++] = nspecial[i][1];
     buf_p[m++] = nspecial[i][2];
     for (k = 0; k < nspecial[i][2]; k++) buf_p[m++] = special[i][k];
     nextra+=nspecial[i][2]+3;
     if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
 
     if (atom->nextra_grow)
       for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       {
         int dm= modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf_p[m]);
         m+=dm;
-                  nextra+=dm;
-                  if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i,delflag);
+            nextra+=dm;
+            if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i,1);
             if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
       }
 
     if(i<nlocal)AtomVecAngle::copy(copylist[j],i,1);
     (*buf_pointer)[j+1] = nextra;
   }
 
           clock_gettime(CLOCK_REALTIME,&time2);
           cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
 
   (*buf_pointer)[0] = nsend_atoms;
   atom->nlocal-=nsend_atoms;
   cuda->shared_data.atom.update_nlocal=2;
  //printf("End Pack Exchange\n");
   if(m==1) return 0;
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecAngleCuda::unpack_exchange(double *buf)
 {
 // printf("Begin UnPack Exchange\n");
   if(cuda->oncpu)
           return AtomVecAngle::unpack_exchange(buf);
 
   double *sublo,*subhi;
   int dim=cuda->shared_data.exchange_dim;
   if(domain->box_change)
   Cuda_AtomVecAngleCuda_Init(&cuda->shared_data);
   if (domain->triclinic == 0) {
     sublo = domain->sublo;
     subhi = domain->subhi;
   } else {
     sublo = domain->sublo_lamda;
     subhi = domain->subhi_lamda;
   }
 
   int mfirst=0;
   for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++)
   {
   int nlocal = atom->nlocal;
   int nsend_atoms=static_cast<int> (buf[0]);
   if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
 
   if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost); //ensure there is enough space on device to unpack data
   int naccept = Cuda_AtomVecAngleCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data());
   cu_copylist->download();
   int m = nsend_atoms*NCUDAEXCHANGE + 1;
   nlocal+=naccept;
 
   timespec time1,time2;
   clock_gettime(CLOCK_REALTIME,&time1);
 
   for(int j=0;j<nsend_atoms;j++)
   {
     if(copylist[j]>-1)
     {
            int k;
           int i=copylist[j];
       num_bond[i] = static_cast<int> (buf[m++]);
       for (k = 0; k < num_bond[i]; k++) {
             bond_type[i][k] = static_cast<int> (buf[m++]);
             bond_atom[i][k] = static_cast<int> (buf[m++]);
             }
 
             num_angle[i] = static_cast<int> (buf[m++]);
             for (k = 0; k < num_angle[i]; k++) {
             angle_type[i][k] = static_cast<int> (buf[m++]);
             angle_atom1[i][k] = static_cast<int> (buf[m++]);
             angle_atom2[i][k] = static_cast<int> (buf[m++]);
             angle_atom3[i][k] = static_cast<int> (buf[m++]);
             }
 
             nspecial[i][0] = static_cast<int> (buf[m++]);
             nspecial[i][1] = static_cast<int> (buf[m++]);
             nspecial[i][2] = static_cast<int> (buf[m++]);
             for (k = 0; k < nspecial[i][2]; k++)
             special[i][k] = static_cast<int> (buf[m++]);
 
             if (atom->nextra_grow)
         for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
                                       m += modify->fix[atom->extra_grow[iextra]]->
                                         unpack_exchange(i,&buf[m]);
 
     }
     else
     m+=static_cast <int> (buf[j+1]);
   }
 
           clock_gettime(CLOCK_REALTIME,&time2);
           cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
 
   cuda->shared_data.atom.nlocal=nlocal;
   cuda->shared_data.atom.update_nlocal=2;
   atom->nlocal=nlocal;
   mfirst+=m;
   buf=&buf[m];
   }
   return mfirst;
 }
diff --git a/src/USER-CUDA/atom_vec_atomic_cuda.cpp b/src/USER-CUDA/atom_vec_atomic_cuda.cpp
index 1a4dbad73..bf8865b5c 100644
--- a/src/USER-CUDA/atom_vec_atomic_cuda.cpp
+++ b/src/USER-CUDA/atom_vec_atomic_cuda.cpp
@@ -1,404 +1,404 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include <cstdlib>
 #include <cstdio>
 #include <cstring>
 #include "atom_vec_atomic_cuda.h"
 #include "comm_cuda_cu.h"
 #include "atom_vec_atomic_cuda_cu.h"
 #include "atom.h"
 #include "domain.h"
 #include "modify.h"
 #include "fix.h"
 #include "memory.h"
 #include "error.h"
 #include "cuda.h"
 #include "comm.h"
 
 using namespace LAMMPS_NS;
 
 #define DELTA 10000
 #define BUFFACTOR 1.5
 #define BUFEXTRA 1000
 #define NCUDAEXCHANGE 11 //nextra x y z vx vy vz tag type mask image
 
 
 #define BUF_FLOAT double
 /* ---------------------------------------------------------------------- */
 
 AtomVecAtomicCuda::AtomVecAtomicCuda(LAMMPS *lmp) : AtomVecAtomic(lmp)
 {
    cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
    maxsend=0;
    cudable=true;
    cuda_init_done=false;
    max_nsend=0;
    cu_copylist=NULL;
    copylist=NULL;
    copylist2=NULL;
 }
 
 void AtomVecAtomicCuda::grow_copylist(int new_max_nsend)
 {
   max_nsend=new_max_nsend;
   delete cu_copylist;
   delete [] copylist2;
   if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist);
   copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false);
   copylist2 = new int[max_nsend];
   cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend);
 }
 
 void AtomVecAtomicCuda::grow_send(int n,double** buf_send,int flag)
 {
   int old_maxsend=*maxsend+BUFEXTRA;
   *maxsend = static_cast<int> (BUFFACTOR * n);
   if (flag)
   {
     if(cuda->pinned)
     {
       double* tmp = new double[old_maxsend];
       memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double));
       if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
       *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
       memcpy(*buf_send,tmp,old_maxsend*sizeof(double));
       delete [] tmp;
     }
     else
     {
      *buf_send = (double *)
       memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double),
                        "comm:buf_send");
     }
   }
   else {
    if(cuda->pinned)
     {
       if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
       *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
     }
     else
     {
       memory->sfree(*buf_send);
       *buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double),
                                           "comm:buf_send");
     }
   }
 }
 
 void AtomVecAtomicCuda::grow_both(int n)
 {
   if(cuda->finished_setup)
   cuda->downloadAll();
   AtomVecAtomic::grow(n);
   if(cuda->finished_setup)
   {
     cuda->checkResize();
     cuda->uploadAll();
   }
 }
 
 int AtomVecAtomicCuda::pack_comm(int n, int* iswap, double *buf,
                              int pbc_flag, int *pbc)
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecAtomic::pack_comm(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
         if((sizeof(X_FLOAT)!=sizeof(double)) && m)
           m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
         return m;
 }
 
 int AtomVecAtomicCuda::pack_comm_vel(int n, int* iswap, double *buf,
                              int pbc_flag, int *pbc)
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecAtomic::pack_comm_vel(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
         if((sizeof(X_FLOAT)!=sizeof(double)) && m)
           m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
         return m;
 }
 /* ---------------------------------------------------------------------- */
 
 void AtomVecAtomicCuda::unpack_comm(int n, int first, double *buf)
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecAtomic::unpack_comm(n,first,buf); return;}
 
   Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
 }
 
 void AtomVecAtomicCuda::unpack_comm_vel(int n, int first, double *buf)
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecAtomic::unpack_comm_vel(n,first,buf); return;}
 
   Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf);
 }
 /* ---------------------------------------------------------------------- */
 
 int AtomVecAtomicCuda::pack_reverse(int n, int first, double *buf)
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecAtomic::pack_reverse(n,first,buf);
 
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     buf[m++] = f[i][0];
     buf[m++] = f[i][1];
     buf[m++] = f[i][2];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecAtomicCuda::unpack_reverse(int n, int *list, double *buf)
 {
   if(not cuda->finished_setup || cuda->oncpu)
           {AtomVecAtomic::unpack_reverse(n,list,buf); return;}
 
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     f[j][0] += buf[m++];
     f[j][1] += buf[m++];
     f[j][2] += buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecAtomicCuda::pack_border(int n, int *iswap, double *buf,
                                int pbc_flag, int *pbc)
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecAtomic::pack_border(n,iswap,buf,pbc_flag,pbc);
 
   int m = Cuda_AtomVecAtomicCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
 
   return m;
 }
 
 int AtomVecAtomicCuda::pack_border_vel(int n, int *iswap, double *buf,
                                int pbc_flag, int *pbc)
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecAtomic::pack_border_vel(n,iswap,buf,pbc_flag,pbc);
 
   int m = Cuda_AtomVecAtomicCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
 
   return m;
 }
 /* ---------------------------------------------------------------------- */
 
 void AtomVecAtomicCuda::unpack_border(int n, int first, double *buf)
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecAtomic::unpack_border(n,first,buf); return;}
   while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax)
   {
           grow_both(0);
   }
   int flag=Cuda_AtomVecAtomicCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf);
   if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
 
 }
 
 void AtomVecAtomicCuda::unpack_border_vel(int n, int first, double *buf)
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecAtomic::unpack_border_vel(n,first,buf); return;}
   while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax)
   {
           grow_both(0);
   }
   int flag=Cuda_AtomVecAtomicCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf);
   if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
 }
 /* ----------------------------------------------------------------------
    pack data for atom I for sending to another proc
    xyz must be 1st 3 values, so comm::exchange() can test on them
 ------------------------------------------------------------------------- */
 
 
 int AtomVecAtomicCuda::pack_exchange(int dim, double *buf)
 {
   if(cuda->oncpu)
           return AtomVecAtomic::pack_exchange(dim,buf);
 
   if(not cuda_init_done||domain->box_change)
   {
           Cuda_AtomVecAtomicCuda_Init(&cuda->shared_data);
           cuda_init_done=true;
   }
   double** buf_pointer=(double**) buf;
   if(*maxsend<atom->nghost || *buf_pointer==NULL)
   {
           grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0);
           *maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend;
   }
 
   if(max_nsend==0) grow_copylist(200);
 
   int nsend_atoms = Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
 
   if(nsend_atoms>max_nsend) {grow_copylist(nsend_atoms+100);}
   if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
   {
           grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
            Cuda_AtomVecAtomicCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
   }
 
   int nlocal=atom->nlocal-nsend_atoms;
 
   for(int i=0;i<nsend_atoms;i++) copylist2[i]=1;
   for(int j=1;j<nsend_atoms+1;j++)
   {
           int i = static_cast <int> ((*buf_pointer)[j]);
           if(i>=nlocal) copylist2[i-nlocal]=-1;
   }
 
   int actpos=0;
   for(int j=1;j<nsend_atoms+1;j++)
   {
           int i = static_cast <int> ((*buf_pointer)[j]);
           if(i<nlocal)
           {
             while(copylist2[actpos]==-1) actpos++;
               copylist[j-1]=nlocal+actpos;
             actpos++;
           }
   }
   cu_copylist->upload();
 
   cuda->shared_data.atom.nlocal=nlocal;
 
   int m = Cuda_AtomVecAtomicCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
   if (atom->nextra_grow)
   for(int j=0;j<nsend_atoms;j++)
   {
       int i=static_cast <int> ((*buf_pointer)[j+1]);
       int nextra=0;
       for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
 
         int dm = modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&((*buf_pointer)[m]));
         m+=dm;
                   nextra+=dm;
-                  if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i,delflag);
+                  if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i,1);
         if(m>*maxsend)  grow_send(m,buf_pointer,1);
       }
       (*buf_pointer)[j+1] = nextra;
 
   }
 
   (*buf_pointer)[0] = nsend_atoms;
   atom->nlocal-=nsend_atoms;
   cuda->shared_data.atom.update_nlocal=2;
 
   if(m==1) return 0;//m is at least 1 in cuda since buf[0] contains number of atoms
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecAtomicCuda::unpack_exchange(double *buf)
 {
   //printf("Unpack Begin\n");
   if(cuda->oncpu)
           return AtomVecAtomic::unpack_exchange(buf);
 
   double *sublo,*subhi;
 
   int dim=cuda->shared_data.exchange_dim;
   if(domain->box_change)
   Cuda_AtomVecAtomicCuda_Init(&cuda->shared_data);
   if (domain->triclinic == 0) {
     sublo = domain->sublo;
     subhi = domain->subhi;
   } else {
     sublo = domain->sublo_lamda;
     subhi = domain->subhi_lamda;
   }
 
   int mfirst=0;
   for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++)
   {
   int nlocal = atom->nlocal;
 
   int nsend_atoms=static_cast<int> (buf[0]);
   if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
 
   if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost);
   int naccept = Cuda_AtomVecAtomicCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data());
   cu_copylist->download();
   int m = nsend_atoms*NCUDAEXCHANGE + 1;
   nlocal+=naccept;
   if (atom->nextra_grow)
   for(int j=0;j<nsend_atoms;j++)
   {
     if(copylist[j]>-1)
     {
                     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
                                       m += modify->fix[atom->extra_grow[iextra]]->
                                         unpack_exchange(copylist[j],&buf[m]);
     }
     else
     {
       m+=static_cast <int> (buf[j+1]);
     }
   }
   cuda->shared_data.atom.nlocal=nlocal;
   if(atom->nlocal!=nlocal)
   cuda->shared_data.atom.update_nlocal=2;
   atom->nlocal=nlocal;
   mfirst+=m;
   buf=&buf[m];
   }
   return mfirst;
 }
diff --git a/src/USER-CUDA/atom_vec_charge_cuda.cpp b/src/USER-CUDA/atom_vec_charge_cuda.cpp
index 86736c79e..f9cb3673e 100644
--- a/src/USER-CUDA/atom_vec_charge_cuda.cpp
+++ b/src/USER-CUDA/atom_vec_charge_cuda.cpp
@@ -1,403 +1,403 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include <cstdlib>
 #include <cstdio>
 #include <cstring>
 #include "atom_vec_charge_cuda.h"
 #include "comm_cuda_cu.h"
 #include "atom_vec_charge_cuda_cu.h"
 #include "atom.h"
 #include "domain.h"
 #include "modify.h"
 #include "fix.h"
 #include "memory.h"
 #include "error.h"
 #include "cuda.h"
 #include "comm.h"
 
 using namespace LAMMPS_NS;
 
 #define DELTA 10000
 #define BUFFACTOR 1.5
 #define BUFEXTRA 1000
 #define NCUDAEXCHANGE 12 //nextra x y z vx vy vz tag type mask image q
 
 #define BUF_FLOAT double
 /* ---------------------------------------------------------------------- */
 
 AtomVecChargeCuda::AtomVecChargeCuda(LAMMPS *lmp) : AtomVecCharge(lmp)
 {
    cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
    maxsend=0;
    cudable=true;
    cuda_init_done=false;
    max_nsend=0;
    cu_copylist=NULL;
    copylist=NULL;
    copylist2=NULL;
 }
 
 void AtomVecChargeCuda::grow_copylist(int new_max_nsend)
 {
   max_nsend=new_max_nsend;
   delete cu_copylist;
   delete [] copylist2;
   if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist);
   copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false);
   copylist2 = new int[max_nsend];
   cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend);
 }
 
 void AtomVecChargeCuda::grow_send(int n,double** buf_send,int flag)  //need to be able to grow the comm send_buffer since the array sahll be copied from the gpu in whole
 {
   int old_maxsend=*maxsend+BUFEXTRA;
   *maxsend = static_cast<int> (BUFFACTOR * n);
   if (flag)
   {
     if(cuda->pinned)
     {
       double* tmp = new double[old_maxsend];
       memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double));
       if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
       *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
       memcpy(*buf_send,tmp,old_maxsend*sizeof(double));
       delete [] tmp;
     }
     else
     {
      *buf_send = (double *)
       memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double),
                        "comm:buf_send");
     }
   }
   else {
    if(cuda->pinned)
     {
       if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
       *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
     }
     else
     {
       memory->sfree(*buf_send);
       *buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double),
                                           "comm:buf_send");
     }
   }
 }
 
 void AtomVecChargeCuda::grow_both(int n)
 {
   if(cuda->finished_setup)
   cuda->downloadAll();
   AtomVecCharge::grow(n);
   if(cuda->finished_setup)
   {
     cuda->checkResize();
     cuda->uploadAll();
   }
 }
 
 int AtomVecChargeCuda::pack_comm(int n, int* iswap, double *buf,
                              int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecCharge::pack_comm(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
         if((sizeof(X_FLOAT)!=sizeof(double)) && m)
           m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
         return m;
 }
 
 int AtomVecChargeCuda::pack_comm_vel(int n, int* iswap, double *buf,
                              int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecCharge::pack_comm_vel(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
         if((sizeof(X_FLOAT)!=sizeof(double)) && m)
           m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
         return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecChargeCuda::unpack_comm(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecCharge::unpack_comm(n,first,buf); return;}
 
   Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
 }
 
 void AtomVecChargeCuda::unpack_comm_vel(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecCharge::unpack_comm_vel(n,first,buf); return;}
 
   Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecChargeCuda::pack_reverse(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only forces are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecCharge::pack_reverse(n,first,buf);
 
   int i,m,last;
   cuda->cu_f->download();
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     buf[m++] = f[i][0];
     buf[m++] = f[i][1];
     buf[m++] = f[i][2];
   }
   cuda->cu_f->upload();
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecChargeCuda::unpack_reverse(int n, int *list, double *buf)//usually this should not be called since comm->communicate handles the communication if only forces are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
           {AtomVecCharge::unpack_reverse(n,list,buf); return;}
 
   int i,j,m;
 
   m = 0;
   cuda->cu_f->download();
   for (i = 0; i < n; i++) {
     j = list[i];
     f[j][0] += buf[m++];
     f[j][1] += buf[m++];
     f[j][2] += buf[m++];
   }
   cuda->cu_f->upload();
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecChargeCuda::pack_border(int n, int *iswap, double *buf,
                                int pbc_flag, int *pbc)
 {
  if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecCharge::pack_border(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_AtomVecChargeCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
 
   return m;
 }
 
 int AtomVecChargeCuda::pack_border_vel(int n, int *iswap, double *buf,
                                int pbc_flag, int *pbc)
 {
  if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecCharge::pack_border_vel(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_AtomVecChargeCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecChargeCuda::unpack_border(int n, int first, double *buf)
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecCharge::unpack_border(n,first,buf); return;}
   while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
   {
           grow_both(0);
   }
   int flag=Cuda_AtomVecChargeCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf);
   if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
 }
 
 void AtomVecChargeCuda::unpack_border_vel(int n, int first, double *buf)
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecCharge::unpack_border_vel(n,first,buf); return;}
   while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
   {
           grow_both(0);
   }
   int flag=Cuda_AtomVecChargeCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf);
   if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
 }
 
 /* ----------------------------------------------------------------------
    pack data for atom I for sending to another proc
    xyz must be 1st 3 values, so comm::exchange() can test on them
 ------------------------------------------------------------------------- */
 
 
 int AtomVecChargeCuda::pack_exchange(int dim, double *buf)
 {
   if(cuda->oncpu)
           return AtomVecCharge::pack_exchange(dim,buf);
 
   if(not cuda_init_done||domain->box_change)
   {
           Cuda_AtomVecChargeCuda_Init(&cuda->shared_data);
           cuda_init_done=true;
   }
   double** buf_pointer=(double**) buf;
   if(*maxsend<atom->nghost || *buf_pointer==NULL)
   {
           grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0);
           *maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend;
   }
 
   if(max_nsend==0) grow_copylist(200);
 
   int nsend_atoms = Cuda_AtomVecChargeCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
 
   if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
   if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
   {
           grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
           Cuda_AtomVecChargeCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
   }
 
   int nlocal=atom->nlocal-nsend_atoms;
 
   for(int i=0;i<nsend_atoms;i++) copylist2[i]=1;
   for(int j=1;j<nsend_atoms+1;j++)
   {
           int i = static_cast <int> ((*buf_pointer)[j]);
           if(i>=nlocal) copylist2[i-nlocal]=-1;
   }
 
   int actpos=0;
   for(int j=1;j<nsend_atoms+1;j++)
   {
           int i = static_cast <int> ((*buf_pointer)[j]);
           if(i<nlocal)
           {
             while(copylist2[actpos]==-1) actpos++;
               copylist[j-1]=nlocal+actpos;
             actpos++;
           }
   }
   cu_copylist->upload();
 
   cuda->shared_data.atom.nlocal=nlocal;
 
   int m = Cuda_AtomVecChargeCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
 
   if (atom->nextra_grow)
   for(int j=0;j<nsend_atoms;j++)
   {
       int i=static_cast <int> ((*buf_pointer)[j+1]);
       int nextra=0;
       for (int iextra = 0; iextra < atom->nextra_grow; iextra++) {
 
         int dm = modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&((*buf_pointer)[m]));
         m+=dm;
                   nextra+=dm;
-                  if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i,delflag);
+                  if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i,1);
         if(m>*maxsend)  grow_send(m,buf_pointer,1);
       }
       (*buf_pointer)[j+1] = nextra;
   }
 
   (*buf_pointer)[0] = nsend_atoms;
   atom->nlocal-=nsend_atoms;
   cuda->shared_data.atom.update_nlocal=2;
 
   if(m==1) return 0;//m is at least 1 in cuda since buf[0] contains number of atoms
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecChargeCuda::unpack_exchange(double *buf)
 {
   if(cuda->oncpu)
           return AtomVecCharge::unpack_exchange(buf);
   double *sublo,*subhi;
 
   int dim=cuda->shared_data.exchange_dim;
   if(domain->box_change)
   Cuda_AtomVecChargeCuda_Init(&cuda->shared_data);
   if (domain->triclinic == 0) {
     sublo = domain->sublo;
     subhi = domain->subhi;
   } else {
     sublo = domain->sublo_lamda;
     subhi = domain->subhi_lamda;
   }
 
   int mfirst=0;
   for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++)
   {
   int nlocal = atom->nlocal;
   int nsend_atoms=static_cast<int> (buf[0]);
   if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
 
   if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost);
   int naccept = Cuda_AtomVecChargeCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data());
   cu_copylist->download();
   int m = nsend_atoms*NCUDAEXCHANGE + 1;
   nlocal+=naccept;
   if (atom->nextra_grow)
   for(int j=0;j<nsend_atoms;j++)
   {
     if(copylist[j]>-1)
     {
                     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
                                       m += modify->fix[atom->extra_grow[iextra]]->
                                         unpack_exchange(copylist[j],&buf[m]);
     }
     else
     m+=static_cast <int> (buf[j+1]);
   }
   cuda->shared_data.atom.nlocal=nlocal;
   cuda->shared_data.atom.update_nlocal=2;
   atom->nlocal=nlocal;
   mfirst+=m;
   buf=&buf[m];
   }
   return mfirst;
 }
diff --git a/src/USER-CUDA/atom_vec_full_cuda.cpp b/src/USER-CUDA/atom_vec_full_cuda.cpp
index 4aa4d1326..4b859290c 100644
--- a/src/USER-CUDA/atom_vec_full_cuda.cpp
+++ b/src/USER-CUDA/atom_vec_full_cuda.cpp
@@ -1,517 +1,517 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include <cstdlib>
 #include <cstdio>
 #include <cstring>
 #include "atom_vec_full_cuda.h"
 #include "comm_cuda_cu.h"
 #include "atom_vec_full_cuda_cu.h"
 #include "atom.h"
 #include "domain.h"
 #include "modify.h"
 #include "fix.h"
 #include "memory.h"
 #include "error.h"
 #include "cuda.h"
 #include "universe.h"
 #include "comm.h"
 
 using namespace LAMMPS_NS;
 
 #define DELTA 10000
 #define BUFFACTOR 1.5
 #define BUFEXTRA 1000
 #define NCUDAEXCHANGE 13 //nextra x y z vx vy vz tag type mask image q molecule
 
 #define BUF_FLOAT double
 /* ---------------------------------------------------------------------- */
 
-AtomVecFullCuda::AtomVecFullCuda(LAMMPS *lmp, int narg, char **arg) :
-  AtomVecFull(lmp, narg, arg)
+AtomVecFullCuda::AtomVecFullCuda(LAMMPS *lmp) :
+  AtomVecFull(lmp)
 {
    cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
    maxsend=0;
    cudable=true;
    cuda_init_done=false;
    max_nsend=0;
    cu_copylist=NULL;
    copylist=NULL;
    copylist2=NULL;
 }
 
 void AtomVecFullCuda::grow_copylist(int new_max_nsend)
 {
   max_nsend=new_max_nsend;
   delete cu_copylist;
   delete [] copylist2;
   if(copylist) CudaWrapper_FreePinnedHostData((void*) copylist);
   copylist = (int*) CudaWrapper_AllocPinnedHostData(max_nsend*sizeof(int),false);
   copylist2 = new int[max_nsend];
   cu_copylist = new cCudaData<int, int, xx > (copylist, max_nsend);
 }
 
 void AtomVecFullCuda::grow_send(int n,double** buf_send,int flag)  //need to be able to grow the comm send_buffer since the array sahll be copied from the gpu in whole
 {
   int old_maxsend=*maxsend+BUFEXTRA;
   *maxsend = static_cast<int> (BUFFACTOR * n);
   if (flag)
   {
     if(cuda->pinned)
     {
       double* tmp = new double[old_maxsend];
       memcpy((void*) tmp,(void*) *buf_send,old_maxsend*sizeof(double));
       if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
       *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
       memcpy(*buf_send,tmp,old_maxsend*sizeof(double));
       delete [] tmp;
     }
     else
     {
      *buf_send = (double *)
       memory->srealloc(*buf_send,(*maxsend+BUFEXTRA)*sizeof(double),
                        "comm:buf_send");
     }
   }
   else {
     if(cuda->pinned)
     {
       if(*buf_send) CudaWrapper_FreePinnedHostData((void*) (*buf_send));
       *buf_send = (double*) CudaWrapper_AllocPinnedHostData((*maxsend+BUFEXTRA)*sizeof(double),false);
     }
     else
     {
       memory->sfree(*buf_send);
       *buf_send = (double *) memory->smalloc((*maxsend+BUFEXTRA)*sizeof(double),
                                           "comm:buf_send");
     }
   }
 }
 
 void AtomVecFullCuda::grow_both(int n)
 {
   if(cuda->finished_setup)
   {
     cuda->cu_special->upload();
     cuda->cu_nspecial->upload();
     cuda->downloadAll();
   }
   AtomVecFull::grow(n);
   if(cuda->finished_setup)
   {
     cuda->checkResize();
     cuda->uploadAll();
   }
 }
 
 int AtomVecFullCuda::pack_comm(int n, int* iswap, double *buf,
                              int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecFull::pack_comm(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_CommCuda_PackComm(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
         if((sizeof(X_FLOAT)!=sizeof(double)) && m)
           m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
         return m;
 }
 
 int AtomVecFullCuda::pack_comm_vel(int n, int* iswap, double *buf,
                              int pbc_flag, int *pbc) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecFull::pack_comm_vel(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_CommCuda_PackCommVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
         if((sizeof(X_FLOAT)!=sizeof(double)) && m)
           m=(m+1)*sizeof(X_FLOAT)/sizeof(double);
         return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecFullCuda::unpack_comm(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecFull::unpack_comm(n,first,buf); return;}
 
   Cuda_CommCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
 }
 
 void AtomVecFullCuda::unpack_comm_vel(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only positions are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecFull::unpack_comm_vel(n,first,buf); return;}
 
   Cuda_CommCuda_UnpackCommVel(&cuda->shared_data,n,first,(void*)buf);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecFullCuda::pack_reverse(int n, int first, double *buf) //usually this should not be called since comm->communicate handles the communication if only forces are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecFull::pack_reverse(n,first,buf);
 
   int i,m,last;
   cuda->cu_f->download();
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     buf[m++] = f[i][0];
     buf[m++] = f[i][1];
     buf[m++] = f[i][2];
   }
   cuda->cu_f->upload();
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecFullCuda::unpack_reverse(int n, int *list, double *buf)//usually this should not be called since comm->communicate handles the communication if only forces are exchanged
 {
   if(not cuda->finished_setup || cuda->oncpu)
           {AtomVecFull::unpack_reverse(n,list,buf); return;}
 
   int i,j,m;
 
   m = 0;
   cuda->cu_f->download();
   for (i = 0; i < n; i++) {
     j = list[i];
     f[j][0] += buf[m++];
     f[j][1] += buf[m++];
     f[j][2] += buf[m++];
   }
   cuda->cu_f->upload();
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecFullCuda::pack_border(int n, int *iswap, double *buf,
                                int pbc_flag, int *pbc)
 {
  if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecFull::pack_border(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_AtomVecFullCuda_PackBorder(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
   return m;
 }
 
 int AtomVecFullCuda::pack_border_vel(int n, int *iswap, double *buf,
                                int pbc_flag, int *pbc)
 {
  if(not cuda->finished_setup || cuda->oncpu)
           return AtomVecFull::pack_border_vel(n,iswap,buf,pbc_flag,pbc);
 
         int m = Cuda_AtomVecFullCuda_PackBorderVel(&cuda->shared_data,n,*iswap,(void*) buf,pbc,pbc_flag);
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void AtomVecFullCuda::unpack_border(int n, int first, double *buf)
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecFull::unpack_border(n,first,buf); return;}
   while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
   {
           grow_both(0);
   }
   int flag=Cuda_AtomVecFullCuda_UnpackBorder(&cuda->shared_data,n,first,(void*)buf);
   if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
 }
 
 void AtomVecFullCuda::unpack_border_vel(int n, int first, double *buf)
 {
   if(not cuda->finished_setup || cuda->oncpu)
            {AtomVecFull::unpack_border_vel(n,first,buf); return;}
   while(atom->nghost+atom->nlocal+n>=cuda->shared_data.atom.nmax) //ensure there is enough space on device to unpack data
   {
           grow_both(0);
   }
   int flag=Cuda_AtomVecFullCuda_UnpackBorderVel(&cuda->shared_data,n,first,(void*)buf);
   if(flag) {printf(" # CUDA: Error: Failed to unpack Border atoms (This might be a bug).\n");}
 }
 
 /* ----------------------------------------------------------------------
    pack data for atom I for sending to another proc
    xyz must be 1st 3 values, so comm::exchange() can test on them
 ------------------------------------------------------------------------- */
 
 
 int AtomVecFullCuda::pack_exchange(int dim, double *buf)
 {
   if(cuda->oncpu)
           return AtomVecFull::pack_exchange(dim,buf);
 
   if(not cuda_init_done||domain->box_change)
   {
           Cuda_AtomVecFullCuda_Init(&cuda->shared_data);
           cuda_init_done=true;
   }
   double** buf_pointer=(double**) buf;
   if(*maxsend<atom->nghost || *buf_pointer==NULL)
   {
           grow_send(atom->nghost>*maxsend?atom->nghost:*maxsend,buf_pointer,0);
           *maxsend=atom->nghost>*maxsend?atom->nghost:*maxsend;
   }
 
   if(max_nsend==0) grow_copylist(200);
 
   int nsend_atoms = Cuda_AtomVecFullCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
 
   if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
   if(nsend_atoms*NCUDAEXCHANGE>*maxsend)
   {
           grow_send((int) (nsend_atoms+100)*NCUDAEXCHANGE,buf_pointer,0);
           Cuda_AtomVecFullCuda_PackExchangeList(&cuda->shared_data,*maxsend,dim,*buf_pointer);
   }
 
   int nlocal=atom->nlocal-nsend_atoms;
 
   for(int i=0;i<nsend_atoms;i++) copylist2[i]=1;
   for(int j=1;j<nsend_atoms+1;j++)
   {
           int i = static_cast <int> ((*buf_pointer)[j]);
           if(i>=nlocal) copylist2[i-nlocal]=-1;
   }
 
   int actpos=0;
   for(int j=1;j<nsend_atoms+1;j++)
   {
           int i = static_cast <int> ((*buf_pointer)[j]);
           if(i<nlocal)
           {
             while(copylist2[actpos]==-1) actpos++;
               copylist[j-1]=nlocal+actpos;
             actpos++;
           }
   }
   cu_copylist->upload();
 
   cuda->shared_data.atom.nlocal=nlocal;
 
   int m = Cuda_AtomVecFullCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
 
   timespec time1,time2;
   clock_gettime(CLOCK_REALTIME,&time1);
 
   double* buf_p=*buf_pointer;
   for(int j=0;j<nsend_atoms;j++)
   {
     int i=static_cast <int> (buf_p[j+1]);
     int nextra=0;
     int k;
     buf_p[m++] = num_bond[i];
     for (k = 0; k < num_bond[i]; k++) {
       buf_p[m++] = bond_type[i][k];
       buf_p[m++] = bond_atom[i][k];
     }
     nextra+=2*num_bond[i]+1;
     if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
 
     buf_p[m++] = num_angle[i];
     for (k = 0; k < num_angle[i]; k++) {
       buf_p[m++] = angle_type[i][k];
       buf_p[m++] = angle_atom1[i][k];
       buf_p[m++] = angle_atom2[i][k];
       buf_p[m++] = angle_atom3[i][k];
     }
     nextra+=4*num_angle[i]+1;
     if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
 
     buf_p[m++] = num_dihedral[i];
     for (k = 0; k < num_dihedral[i]; k++) {
       buf_p[m++] = dihedral_type[i][k];
       buf_p[m++] = dihedral_atom1[i][k];
       buf_p[m++] = dihedral_atom2[i][k];
       buf_p[m++] = dihedral_atom3[i][k];
       buf_p[m++] = dihedral_atom4[i][k];
     }
     nextra+=5*num_dihedral[i]+1;
     if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
 
     buf_p[m++] = num_improper[i];
     for (k = 0; k < num_improper[i]; k++) {
       buf_p[m++] = improper_type[i][k];
       buf_p[m++] = improper_atom1[i][k];
       buf_p[m++] = improper_atom2[i][k];
       buf_p[m++] = improper_atom3[i][k];
       buf_p[m++] = improper_atom4[i][k];
     }
     nextra+=5*num_improper[i]+1;
     if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
 
     buf_p[m++] = nspecial[i][0];
     buf_p[m++] = nspecial[i][1];
     buf_p[m++] = nspecial[i][2];
     for (k = 0; k < nspecial[i][2]; k++) buf_p[m++] = special[i][k];
     nextra+=nspecial[i][2]+3;
     if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
 
     if (atom->nextra_grow)
       for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
       {
         int dm= modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf_p[m]);
         m+=dm;
                   nextra+=dm;
-                  if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i,delflag);
+                  if(i<nlocal)modify->fix[atom->extra_grow[iextra]]->copy_arrays(copylist[j],i,1);
         if(m>*maxsend) {grow_send(m,buf_pointer,1); buf_p=*buf_pointer;}
       }
 
     if(i<nlocal)AtomVecFull::copy(copylist[j],i,1);
     (*buf_pointer)[j+1] = nextra;
   }
 
           clock_gettime(CLOCK_REALTIME,&time2);
           cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
 
   (*buf_pointer)[0] = nsend_atoms;
   atom->nlocal-=nsend_atoms;
   cuda->shared_data.atom.update_nlocal=2;
  //printf("End Pack Exchange\n");
   if(m==1) return 0;
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int AtomVecFullCuda::unpack_exchange(double *buf)
 {
 // printf("Begin UnPack Exchange\n");
   if(cuda->oncpu)
           return AtomVecFull::unpack_exchange(buf);
 
   double *sublo,*subhi;
   int dim=cuda->shared_data.exchange_dim;
   if(domain->box_change)
   Cuda_AtomVecFullCuda_Init(&cuda->shared_data);
   if (domain->triclinic == 0) {
     sublo = domain->sublo;
     subhi = domain->subhi;
   } else {
     sublo = domain->sublo_lamda;
     subhi = domain->subhi_lamda;
   }
 
   int mfirst=0;
   for(int pi=0;pi<(comm->procgrid[dim]>2?2:1);pi++)
   {
   int nlocal = atom->nlocal;
   int nsend_atoms=static_cast<int> (buf[0]);
   if(nsend_atoms>max_nsend) grow_copylist(nsend_atoms+100);
 
   if (nlocal+nsend_atoms+atom->nghost>=atom->nmax) grow_both(nlocal+nsend_atoms*2+atom->nghost); //ensure there is enough space on device to unpack data
   int naccept = Cuda_AtomVecFullCuda_UnpackExchange(&cuda->shared_data,nsend_atoms,buf,cu_copylist->dev_data());
   cu_copylist->download();
   int m = nsend_atoms*NCUDAEXCHANGE + 1;
   nlocal+=naccept;
 
   timespec time1,time2;
   clock_gettime(CLOCK_REALTIME,&time1);
 
   for(int j=0;j<nsend_atoms;j++)
   {
     if(copylist[j]>-1)
     {
            int k;
           int i=copylist[j];
       num_bond[i] = static_cast<int> (buf[m++]);
       for (k = 0; k < num_bond[i]; k++) {
             bond_type[i][k] = static_cast<int> (buf[m++]);
             bond_atom[i][k] = static_cast<int> (buf[m++]);
             }
 
             num_angle[i] = static_cast<int> (buf[m++]);
             for (k = 0; k < num_angle[i]; k++) {
             angle_type[i][k] = static_cast<int> (buf[m++]);
             angle_atom1[i][k] = static_cast<int> (buf[m++]);
             angle_atom2[i][k] = static_cast<int> (buf[m++]);
             angle_atom3[i][k] = static_cast<int> (buf[m++]);
             }
 
             num_dihedral[i] = static_cast<int> (buf[m++]);
             for (k = 0; k < num_dihedral[i]; k++) {
             dihedral_type[i][k] = static_cast<int> (buf[m++]);
             dihedral_atom1[i][k] = static_cast<int> (buf[m++]);
             dihedral_atom2[i][k] = static_cast<int> (buf[m++]);
             dihedral_atom3[i][k] = static_cast<int> (buf[m++]);
             dihedral_atom4[i][k] = static_cast<int> (buf[m++]);
             }
 
             num_improper[i] = static_cast<int> (buf[m++]);
             for (k = 0; k < num_improper[i]; k++) {
             improper_type[i][k] = static_cast<int> (buf[m++]);
             improper_atom1[i][k] = static_cast<int> (buf[m++]);
             improper_atom2[i][k] = static_cast<int> (buf[m++]);
             improper_atom3[i][k] = static_cast<int> (buf[m++]);
             improper_atom4[i][k] = static_cast<int> (buf[m++]);
             }
 
             nspecial[i][0] = static_cast<int> (buf[m++]);
             nspecial[i][1] = static_cast<int> (buf[m++]);
             nspecial[i][2] = static_cast<int> (buf[m++]);
             for (k = 0; k < nspecial[i][2]; k++)
             special[i][k] = static_cast<int> (buf[m++]);
 
             if (atom->nextra_grow)
         for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
                                       m += modify->fix[atom->extra_grow[iextra]]->
                                         unpack_exchange(i,&buf[m]);
 
     }
     else
     m+=static_cast <int> (buf[j+1]);
   }
 
           clock_gettime(CLOCK_REALTIME,&time2);
           cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
 
   cuda->shared_data.atom.nlocal=nlocal;
   cuda->shared_data.atom.update_nlocal=2;
   atom->nlocal=nlocal;
   mfirst+=m;
   buf=&buf[m];
   }
   return mfirst;
 }
diff --git a/src/USER-CUDA/atom_vec_full_cuda.h b/src/USER-CUDA/atom_vec_full_cuda.h
index 5ce96a203..958077744 100644
--- a/src/USER-CUDA/atom_vec_full_cuda.h
+++ b/src/USER-CUDA/atom_vec_full_cuda.h
@@ -1,69 +1,69 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #ifdef ATOM_CLASS
 
 AtomStyle(full/cuda,AtomVecFullCuda)
 
 #else
 
 #ifndef LMP_ATOM_VEC_FULL_CUDA_H
 #define LMP_ATOM_VEC_FULL_CUDA_H
 
 #include "atom_vec_full.h"
 #include "cuda_data.h"
 
 namespace LAMMPS_NS {
 
 class AtomVecFullCuda : public AtomVecFull {
  public:
-  AtomVecFullCuda(class LAMMPS *, int, char **);
+  AtomVecFullCuda(class LAMMPS *);
   virtual ~AtomVecFullCuda() {}
   void grow_copylist(int n);
   void grow_send(int n,double** buf_send,int flag);
   void grow_both(int n);
   int pack_comm(int, int *, double *, int, int *);
   int pack_comm_vel(int, int *, double *, int, int *);
   void unpack_comm(int, int, double *);
   void unpack_comm_vel(int, int, double *);
   int pack_reverse(int, int, double *);
   void unpack_reverse(int, int *, double *);
   int pack_border(int, int *, double *, int, int *);
   int pack_border_vel(int, int *, double *, int, int *);
   void unpack_border(int, int, double *);
   void unpack_border_vel(int, int, double *);
   int pack_exchange(int, double *);
   int unpack_exchange(double *);
  private:
   class Cuda *cuda;
   bool cuda_init_done;
   int* copylist;
   int* copylist2;
   cCudaData<int, int, xx >* cu_copylist;
   int max_nsend;
 };
 
 }
 
 #endif
 #endif