diff --git a/bench/KEPLER/Makefile.cpu b/bench/KEPLER/Makefile.cpu
new file mode 100755
index 000000000..f27d7e930
--- /dev/null
+++ b/bench/KEPLER/Makefile.cpu
@@ -0,0 +1,108 @@
+# linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
+
+SHELL = /bin/sh
+
+# ---------------------------------------------------------------------
+# compiler/linker settings
+# specify flags and libraries needed for your compiler
+
+CC =		icc
+CCFLAGS =	-O
+SHFLAGS =	-fPIC
+DEPFLAGS =	-M
+
+LINK =		icc
+LINKFLAGS =	-O
+LIB =           -lstdc++
+SIZE =		size
+
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SHLIBFLAGS =	-shared
+
+# ---------------------------------------------------------------------
+# LAMMPS-specific settings
+# specify settings for LAMMPS features you will use
+# if you change any -D setting, do full re-compile after "make clean"
+
+# LAMMPS ifdef settings, OPTIONAL
+# see possible settings in doc/Section_start.html#2_2 (step 4)
+
+LMP_INC =
+
+# MPI library, REQUIRED
+# see discussion in doc/Section_start.html#2_2 (step 5)
+# can point to dummy MPI library in src/STUBS as in Makefile.serial
+# INC = path for mpi.h, MPI compiler settings
+# PATH = path for MPI library
+# LIB = name of MPI library
+
+MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
+MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
+MPI_LIB =	-lmpi
+
+# FFT library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 6)
+# can be left blank to use provided KISS FFT library
+# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
+# PATH = path for FFT library
+# LIB = name of FFT library
+
+FFT_INC =
+FFT_PATH = 
+FFT_LIB =
+
+# JPEG and/or PNG library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 7)
+# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
+# INC = path(s) for jpeglib.h and/or png.h
+# PATH = path(s) for JPEG library and/or PNG library
+# LIB = name(s) of JPEG library and/or PNG library
+
+JPG_INC =       
+JPG_PATH = 	
+JPG_LIB =	-ljpeg
+
+# ---------------------------------------------------------------------
+# build rules and dependencies
+# no need to edit this section
+
+include	Makefile.package.settings
+include	Makefile.package
+
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
+EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
+EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
+
+# Path to src files
+
+vpath %.cpp ..
+vpath %.h ..
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library targets
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+shlib:	$(OBJ)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
+        $(OBJ) $(EXTRA_LIB) $(LIB)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+sinclude $(DEPENDS)
diff --git a/bench/KEPLER/Makefile.cuda b/bench/KEPLER/Makefile.cuda
new file mode 100755
index 000000000..f27d7e930
--- /dev/null
+++ b/bench/KEPLER/Makefile.cuda
@@ -0,0 +1,108 @@
+# linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
+
+SHELL = /bin/sh
+
+# ---------------------------------------------------------------------
+# compiler/linker settings
+# specify flags and libraries needed for your compiler
+
+CC =		icc
+CCFLAGS =	-O
+SHFLAGS =	-fPIC
+DEPFLAGS =	-M
+
+LINK =		icc
+LINKFLAGS =	-O
+LIB =           -lstdc++
+SIZE =		size
+
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SHLIBFLAGS =	-shared
+
+# ---------------------------------------------------------------------
+# LAMMPS-specific settings
+# specify settings for LAMMPS features you will use
+# if you change any -D setting, do full re-compile after "make clean"
+
+# LAMMPS ifdef settings, OPTIONAL
+# see possible settings in doc/Section_start.html#2_2 (step 4)
+
+LMP_INC =
+
+# MPI library, REQUIRED
+# see discussion in doc/Section_start.html#2_2 (step 5)
+# can point to dummy MPI library in src/STUBS as in Makefile.serial
+# INC = path for mpi.h, MPI compiler settings
+# PATH = path for MPI library
+# LIB = name of MPI library
+
+MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
+MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
+MPI_LIB =	-lmpi
+
+# FFT library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 6)
+# can be left blank to use provided KISS FFT library
+# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
+# PATH = path for FFT library
+# LIB = name of FFT library
+
+FFT_INC =
+FFT_PATH = 
+FFT_LIB =
+
+# JPEG and/or PNG library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 7)
+# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
+# INC = path(s) for jpeglib.h and/or png.h
+# PATH = path(s) for JPEG library and/or PNG library
+# LIB = name(s) of JPEG library and/or PNG library
+
+JPG_INC =       
+JPG_PATH = 	
+JPG_LIB =	-ljpeg
+
+# ---------------------------------------------------------------------
+# build rules and dependencies
+# no need to edit this section
+
+include	Makefile.package.settings
+include	Makefile.package
+
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
+EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
+EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
+
+# Path to src files
+
+vpath %.cpp ..
+vpath %.h ..
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library targets
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+shlib:	$(OBJ)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
+        $(OBJ) $(EXTRA_LIB) $(LIB)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+sinclude $(DEPENDS)
diff --git a/bench/KEPLER/Makefile.gpu b/bench/KEPLER/Makefile.gpu
new file mode 100755
index 000000000..f27d7e930
--- /dev/null
+++ b/bench/KEPLER/Makefile.gpu
@@ -0,0 +1,108 @@
+# linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
+
+SHELL = /bin/sh
+
+# ---------------------------------------------------------------------
+# compiler/linker settings
+# specify flags and libraries needed for your compiler
+
+CC =		icc
+CCFLAGS =	-O
+SHFLAGS =	-fPIC
+DEPFLAGS =	-M
+
+LINK =		icc
+LINKFLAGS =	-O
+LIB =           -lstdc++
+SIZE =		size
+
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SHLIBFLAGS =	-shared
+
+# ---------------------------------------------------------------------
+# LAMMPS-specific settings
+# specify settings for LAMMPS features you will use
+# if you change any -D setting, do full re-compile after "make clean"
+
+# LAMMPS ifdef settings, OPTIONAL
+# see possible settings in doc/Section_start.html#2_2 (step 4)
+
+LMP_INC =
+
+# MPI library, REQUIRED
+# see discussion in doc/Section_start.html#2_2 (step 5)
+# can point to dummy MPI library in src/STUBS as in Makefile.serial
+# INC = path for mpi.h, MPI compiler settings
+# PATH = path for MPI library
+# LIB = name of MPI library
+
+MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
+MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
+MPI_LIB =	-lmpi
+
+# FFT library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 6)
+# can be left blank to use provided KISS FFT library
+# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
+# PATH = path for FFT library
+# LIB = name of FFT library
+
+FFT_INC =
+FFT_PATH = 
+FFT_LIB =
+
+# JPEG and/or PNG library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 7)
+# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
+# INC = path(s) for jpeglib.h and/or png.h
+# PATH = path(s) for JPEG library and/or PNG library
+# LIB = name(s) of JPEG library and/or PNG library
+
+JPG_INC =       
+JPG_PATH = 	
+JPG_LIB =	-ljpeg
+
+# ---------------------------------------------------------------------
+# build rules and dependencies
+# no need to edit this section
+
+include	Makefile.package.settings
+include	Makefile.package
+
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
+EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
+EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
+
+# Path to src files
+
+vpath %.cpp ..
+vpath %.h ..
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library targets
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+shlib:	$(OBJ)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
+        $(OBJ) $(EXTRA_LIB) $(LIB)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+sinclude $(DEPENDS)
diff --git a/bench/KEPLER/Makefile.gpu.double b/bench/KEPLER/Makefile.gpu.double
new file mode 100644
index 000000000..19dae5544
--- /dev/null
+++ b/bench/KEPLER/Makefile.gpu.double
@@ -0,0 +1,50 @@
+# /* ----------------------------------------------------------------------   
+#  Generic Linux Makefile for CUDA 
+#     - Change CUDA_ARCH for your GPU
+# ------------------------------------------------------------------------- */
+
+# which file will be copied to Makefile.lammps
+
+EXTRAMAKE = Makefile.lammps.standard
+
+CUDA_HOME = /home/projects/cuda/6.0.37
+NVCC = nvcc
+
+# Kepler CUDA
+CUDA_ARCH = -arch=sm_35
+# Tesla CUDA
+#CUDA_ARCH = -arch=sm_21
+# newer CUDA
+#CUDA_ARCH = -arch=sm_13
+# older CUDA
+#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
+
+# this setting should match LAMMPS Makefile
+# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
+
+LMP_INC = -DLAMMPS_SMALLBIG
+
+# precision for GPU calculations
+# -D_SINGLE_SINGLE  # Single precision for all calculations
+# -D_DOUBLE_DOUBLE  # Double precision for all calculations
+# -D_SINGLE_DOUBLE  # Accumulation of forces, etc. in double
+
+CUDA_PRECISION = -D_DOUBLE_DOUBLE
+
+CUDA_INCLUDE = -I$(CUDA_HOME)/include
+CUDA_LIB = -L$(CUDA_HOME)/lib64
+CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
+
+CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
+CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
+
+BIN_DIR = ./
+OBJ_DIR = ./
+LIB_DIR = ./
+AR = ar
+BSH = /bin/sh
+
+CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini
+
+include Nvidia.makefile
+
diff --git a/bench/KEPLER/Makefile.gpu.mixed b/bench/KEPLER/Makefile.gpu.mixed
new file mode 100644
index 000000000..97ca9201c
--- /dev/null
+++ b/bench/KEPLER/Makefile.gpu.mixed
@@ -0,0 +1,50 @@
+# /* ----------------------------------------------------------------------   
+#  Generic Linux Makefile for CUDA 
+#     - Change CUDA_ARCH for your GPU
+# ------------------------------------------------------------------------- */
+
+# which file will be copied to Makefile.lammps
+
+EXTRAMAKE = Makefile.lammps.standard
+
+CUDA_HOME = /home/projects/cuda/6.0.37
+NVCC = nvcc
+
+# Kepler CUDA
+CUDA_ARCH = -arch=sm_35
+# Tesla CUDA
+#CUDA_ARCH = -arch=sm_21
+# newer CUDA
+#CUDA_ARCH = -arch=sm_13
+# older CUDA
+#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
+
+# this setting should match LAMMPS Makefile
+# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
+
+LMP_INC = -DLAMMPS_SMALLBIG
+
+# precision for GPU calculations
+# -D_SINGLE_SINGLE  # Single precision for all calculations
+# -D_DOUBLE_DOUBLE  # Double precision for all calculations
+# -D_SINGLE_DOUBLE  # Accumulation of forces, etc. in double
+
+CUDA_PRECISION = -D_SINGLE_DOUBLE
+
+CUDA_INCLUDE = -I$(CUDA_HOME)/include
+CUDA_LIB = -L$(CUDA_HOME)/lib64
+CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
+
+CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
+CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
+
+BIN_DIR = ./
+OBJ_DIR = ./
+LIB_DIR = ./
+AR = ar
+BSH = /bin/sh
+
+CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini
+
+include Nvidia.makefile
+
diff --git a/bench/KEPLER/Makefile.gpu.single b/bench/KEPLER/Makefile.gpu.single
new file mode 100644
index 000000000..f8005aaac
--- /dev/null
+++ b/bench/KEPLER/Makefile.gpu.single
@@ -0,0 +1,50 @@
+# /* ----------------------------------------------------------------------   
+#  Generic Linux Makefile for CUDA 
+#     - Change CUDA_ARCH for your GPU
+# ------------------------------------------------------------------------- */
+
+# which file will be copied to Makefile.lammps
+
+EXTRAMAKE = Makefile.lammps.standard
+
+CUDA_HOME = /home/projects/cuda/6.0.37
+NVCC = nvcc
+
+# Kepler CUDA
+CUDA_ARCH = -arch=sm_35
+# Tesla CUDA
+#CUDA_ARCH = -arch=sm_21
+# newer CUDA
+#CUDA_ARCH = -arch=sm_13
+# older CUDA
+#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
+
+# this setting should match LAMMPS Makefile
+# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
+
+LMP_INC = -DLAMMPS_SMALLBIG
+
+# precision for GPU calculations
+# -D_SINGLE_SINGLE  # Single precision for all calculations
+# -D_DOUBLE_DOUBLE  # Double precision for all calculations
+# -D_SINGLE_DOUBLE  # Accumulation of forces, etc. in double
+
+CUDA_PRECISION = -D_SINGLE_SINGLE
+
+CUDA_INCLUDE = -I$(CUDA_HOME)/include
+CUDA_LIB = -L$(CUDA_HOME)/lib64
+CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
+
+CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
+CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
+
+BIN_DIR = ./
+OBJ_DIR = ./
+LIB_DIR = ./
+AR = ar
+BSH = /bin/sh
+
+CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini
+
+include Nvidia.makefile
+
diff --git a/bench/KEPLER/Makefile.intel.cpu b/bench/KEPLER/Makefile.intel.cpu
new file mode 100755
index 000000000..49a5f12b8
--- /dev/null
+++ b/bench/KEPLER/Makefile.intel.cpu
@@ -0,0 +1,109 @@
+# linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
+
+SHELL = /bin/sh
+
+# ---------------------------------------------------------------------
+# compiler/linker settings
+# specify flags and libraries needed for your compiler
+
+CC =		icc
+CCFLAGS =	-O3 -openmp -DLAMMPS_MEMALIGN=64 -no-offload \
+                -xHost -fno-alias -ansi-alias -restrict -override-limits
+SHFLAGS =	-fPIC
+DEPFLAGS =	-M
+
+LINK =		icc
+LINKFLAGS =	-O -openmp
+LIB =           -lstdc++
+SIZE =		size
+
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SHLIBFLAGS =	-shared
+
+# ---------------------------------------------------------------------
+# LAMMPS-specific settings
+# specify settings for LAMMPS features you will use
+# if you change any -D setting, do full re-compile after "make clean"
+
+# LAMMPS ifdef settings, OPTIONAL
+# see possible settings in doc/Section_start.html#2_2 (step 4)
+
+LMP_INC =	-DLAMMPS_GZIP -DLAMMPS_JPEG
+
+# MPI library, REQUIRED
+# see discussion in doc/Section_start.html#2_2 (step 5)
+# can point to dummy MPI library in src/STUBS as in Makefile.serial
+# INC = path for mpi.h, MPI compiler settings
+# PATH = path for MPI library
+# LIB = name of MPI library
+
+MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
+MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
+MPI_LIB =	-lmpi
+
+# FFT library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 6)
+# can be left blank to use provided KISS FFT library
+# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
+# PATH = path for FFT library
+# LIB = name of FFT library
+
+FFT_INC =
+FFT_PATH = 
+FFT_LIB =
+
+# JPEG and/or PNG library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 7)
+# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
+# INC = path(s) for jpeglib.h and/or png.h
+# PATH = path(s) for JPEG library and/or PNG library
+# LIB = name(s) of JPEG library and/or PNG library
+
+JPG_INC =       
+JPG_PATH = 	
+JPG_LIB =	-ljpeg
+
+# ---------------------------------------------------------------------
+# build rules and dependencies
+# no need to edit this section
+
+include	Makefile.package.settings
+include	Makefile.package
+
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
+EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
+EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
+
+# Path to src files
+
+vpath %.cpp ..
+vpath %.h ..
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library targets
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+shlib:	$(OBJ)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
+        $(OBJ) $(EXTRA_LIB) $(LIB)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+sinclude $(DEPENDS)
diff --git a/bench/KEPLER/Makefile.kokkos.cuda b/bench/KEPLER/Makefile.kokkos.cuda
new file mode 100755
index 000000000..3f7569488
--- /dev/null
+++ b/bench/KEPLER/Makefile.kokkos.cuda
@@ -0,0 +1,113 @@
+# linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
+
+SHELL = /bin/sh
+
+# ---------------------------------------------------------------------
+# compiler/linker settings
+# specify flags and libraries needed for your compiler
+
+CC =		nvcc
+CCFLAGS =	-O3 -arch=sm_35
+SHFLAGS =	-fPIC
+DEPFLAGS =	-M
+
+LINK =		mpicxx
+LINKFLAGS =	-O
+LIB =           -lstdc++
+SIZE =		size
+
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SHLIBFLAGS =	-shared
+
+OMP = yes
+CUDA = yes
+
+# ---------------------------------------------------------------------
+# LAMMPS-specific settings
+# specify settings for LAMMPS features you will use
+# if you change any -D setting, do full re-compile after "make clean"
+
+# LAMMPS ifdef settings, OPTIONAL
+# see possible settings in doc/Section_start.html#2_2 (step 4)
+
+LMP_INC =
+
+# MPI library, REQUIRED
+# see discussion in doc/Section_start.html#2_2 (step 5)
+# can point to dummy MPI library in src/STUBS as in Makefile.serial
+# INC = path for mpi.h, MPI compiler settings
+# PATH = path for MPI library
+# LIB = name of MPI library
+
+MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
+MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
+MPI_LIB =	-lmpi
+
+# FFT library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 6)
+# can be left blank to use provided KISS FFT library
+# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
+# PATH = path for FFT library
+# LIB = name of FFT library
+
+FFT_INC =
+FFT_PATH = 
+FFT_LIB =
+
+# JPEG and/or PNG library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 7)
+# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
+# INC = path(s) for jpeglib.h and/or png.h
+# PATH = path(s) for JPEG library and/or PNG library
+# LIB = name(s) of JPEG library and/or PNG library
+
+JPG_INC =       
+JPG_PATH = 	
+JPG_LIB =	-ljpeg
+
+# ---------------------------------------------------------------------
+# build rules and dependencies
+# no need to edit this section
+
+include	Makefile.package.settings
+include	Makefile.package
+
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
+EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
+EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
+
+# Path to src files
+
+vpath %.cpp ..
+vpath %.h ..
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library targets
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+shlib:	$(OBJ)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
+        $(OBJ) $(EXTRA_LIB) $(LIB)
+
+# Compilation rules
+%.o:%.cu
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+sinclude $(DEPENDS)
diff --git a/bench/KEPLER/Makefile.kokkos.omp b/bench/KEPLER/Makefile.kokkos.omp
new file mode 100755
index 000000000..61efa1ff6
--- /dev/null
+++ b/bench/KEPLER/Makefile.kokkos.omp
@@ -0,0 +1,110 @@
+# linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
+
+SHELL = /bin/sh
+
+# ---------------------------------------------------------------------
+# compiler/linker settings
+# specify flags and libraries needed for your compiler
+
+CC =		icc
+CCFLAGS =	-O
+SHFLAGS =	-fPIC
+DEPFLAGS =	-M
+
+LINK =		icc
+LINKFLAGS =	-O
+LIB =           -lstdc++
+SIZE =		size
+
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SHLIBFLAGS =	-shared
+
+OMP = yes
+
+# ---------------------------------------------------------------------
+# LAMMPS-specific settings
+# specify settings for LAMMPS features you will use
+# if you change any -D setting, do full re-compile after "make clean"
+
+# LAMMPS ifdef settings, OPTIONAL
+# see possible settings in doc/Section_start.html#2_2 (step 4)
+
+LMP_INC =
+
+# MPI library, REQUIRED
+# see discussion in doc/Section_start.html#2_2 (step 5)
+# can point to dummy MPI library in src/STUBS as in Makefile.serial
+# INC = path for mpi.h, MPI compiler settings
+# PATH = path for MPI library
+# LIB = name of MPI library
+
+MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
+MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
+MPI_LIB =	-lmpi
+
+# FFT library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 6)
+# can be left blank to use provided KISS FFT library
+# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
+# PATH = path for FFT library
+# LIB = name of FFT library
+
+FFT_INC =
+FFT_PATH = 
+FFT_LIB =
+
+# JPEG and/or PNG library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 7)
+# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
+# INC = path(s) for jpeglib.h and/or png.h
+# PATH = path(s) for JPEG library and/or PNG library
+# LIB = name(s) of JPEG library and/or PNG library
+
+JPG_INC =       
+JPG_PATH = 	
+JPG_LIB =	-ljpeg
+
+# ---------------------------------------------------------------------
+# build rules and dependencies
+# no need to edit this section
+
+include	Makefile.package.settings
+include	Makefile.package
+
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
+EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
+EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
+
+# Path to src files
+
+vpath %.cpp ..
+vpath %.h ..
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library targets
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+shlib:	$(OBJ)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
+        $(OBJ) $(EXTRA_LIB) $(LIB)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+sinclude $(DEPENDS)
diff --git a/bench/KEPLER/Makefile.omp b/bench/KEPLER/Makefile.omp
new file mode 100755
index 000000000..41c430f0f
--- /dev/null
+++ b/bench/KEPLER/Makefile.omp
@@ -0,0 +1,108 @@
+# linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
+
+SHELL = /bin/sh
+
+# ---------------------------------------------------------------------
+# compiler/linker settings
+# specify flags and libraries needed for your compiler
+
+CC =		icc
+CCFLAGS =	-O3 -openmp -ansi-alias
+SHFLAGS =	-fPIC
+DEPFLAGS =	-M
+
+LINK =		icc
+LINKFLAGS =	-O -openmp
+LIB =           -lstdc++
+SIZE =		size
+
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SHLIBFLAGS =	-shared
+
+# ---------------------------------------------------------------------
+# LAMMPS-specific settings
+# specify settings for LAMMPS features you will use
+# if you change any -D setting, do full re-compile after "make clean"
+
+# LAMMPS ifdef settings, OPTIONAL
+# see possible settings in doc/Section_start.html#2_2 (step 4)
+
+LMP_INC =
+
+# MPI library, REQUIRED
+# see discussion in doc/Section_start.html#2_2 (step 5)
+# can point to dummy MPI library in src/STUBS as in Makefile.serial
+# INC = path for mpi.h, MPI compiler settings
+# PATH = path for MPI library
+# LIB = name of MPI library
+
+MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
+MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
+MPI_LIB =	-lmpi
+
+# FFT library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 6)
+# can be left blank to use provided KISS FFT library
+# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
+# PATH = path for FFT library
+# LIB = name of FFT library
+
+FFT_INC =
+FFT_PATH = 
+FFT_LIB =
+
+# JPEG and/or PNG library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 7)
+# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
+# INC = path(s) for jpeglib.h and/or png.h
+# PATH = path(s) for JPEG library and/or PNG library
+# LIB = name(s) of JPEG library and/or PNG library
+
+JPG_INC =       
+JPG_PATH = 	
+JPG_LIB =	-ljpeg
+
+# ---------------------------------------------------------------------
+# build rules and dependencies
+# no need to edit this section
+
+include	Makefile.package.settings
+include	Makefile.package
+
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
+EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
+EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
+
+# Path to src files
+
+vpath %.cpp ..
+vpath %.h ..
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library targets
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+shlib:	$(OBJ)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
+        $(OBJ) $(EXTRA_LIB) $(LIB)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+sinclude $(DEPENDS)
diff --git a/bench/KEPLER/Makefile.opt b/bench/KEPLER/Makefile.opt
new file mode 100755
index 000000000..f27d7e930
--- /dev/null
+++ b/bench/KEPLER/Makefile.opt
@@ -0,0 +1,108 @@
+# linux = Shannon Linux box, Intel icc, OpenMPI, KISS FFTW
+
+SHELL = /bin/sh
+
+# ---------------------------------------------------------------------
+# compiler/linker settings
+# specify flags and libraries needed for your compiler
+
+CC =		icc
+CCFLAGS =	-O
+SHFLAGS =	-fPIC
+DEPFLAGS =	-M
+
+LINK =		icc
+LINKFLAGS =	-O
+LIB =           -lstdc++
+SIZE =		size
+
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SHLIBFLAGS =	-shared
+
+# ---------------------------------------------------------------------
+# LAMMPS-specific settings
+# specify settings for LAMMPS features you will use
+# if you change any -D setting, do full re-compile after "make clean"
+
+# LAMMPS ifdef settings, OPTIONAL
+# see possible settings in doc/Section_start.html#2_2 (step 4)
+
+LMP_INC =
+
+# MPI library, REQUIRED
+# see discussion in doc/Section_start.html#2_2 (step 5)
+# can point to dummy MPI library in src/STUBS as in Makefile.serial
+# INC = path for mpi.h, MPI compiler settings
+# PATH = path for MPI library
+# LIB = name of MPI library
+
+MPI_INC =       -I/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/include/
+MPI_PATH =      -L/home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37/lib
+MPI_LIB =	-lmpi
+
+# FFT library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 6)
+# can be left blank to use provided KISS FFT library
+# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
+# PATH = path for FFT library
+# LIB = name of FFT library
+
+FFT_INC =
+FFT_PATH = 
+FFT_LIB =
+
+# JPEG and/or PNG library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 7)
+# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
+# INC = path(s) for jpeglib.h and/or png.h
+# PATH = path(s) for JPEG library and/or PNG library
+# LIB = name(s) of JPEG library and/or PNG library
+
+JPG_INC =       
+JPG_PATH = 	
+JPG_LIB =	-ljpeg
+
+# ---------------------------------------------------------------------
+# build rules and dependencies
+# no need to edit this section
+
+include	Makefile.package.settings
+include	Makefile.package
+
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
+EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
+EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
+
+# Path to src files
+
+vpath %.cpp ..
+vpath %.h ..
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library targets
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+shlib:	$(OBJ)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
+        $(OBJ) $(EXTRA_LIB) $(LIB)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+sinclude $(DEPENDS)
diff --git a/bench/KEPLER/README b/bench/KEPLER/README
new file mode 100644
index 000000000..5244aa891
--- /dev/null
+++ b/bench/KEPLER/README
@@ -0,0 +1,68 @@
+These are input scripts used to run versions of several of the
+benchmarks in the top-level bench directory using the GPU and
+USER-CUDA accelerator packages.  The results of running these scripts
+on two different machines (a desktop with 2 Tesla GPUs and the ORNL
+Titan supercomputer) are shown on the "GPU (Fermi)" section of the
+Benchmark page of the LAMMPS WWW site: lammps.sandia.gov/bench.
+
+Examples are shown below of how to run these scripts.  This assumes
+you have built 3 executables with both the GPU and USER-CUDA packages
+installed, e.g.
+
+lmp_linux_single
+lmp_linux_mixed
+lmp_linux_double
+
+The precision (single, mixed, double) refers to the GPU and USER-CUDA
+pacakge precision.  See the README files in the lib/gpu and lib/cuda
+directories for instructions on how to build the packages with
+different precisions.  The doc/Section_accelerate.html file also has a
+summary description.
+
+------------------------------------------------------------------------
+
+If the script has "cpu" in its name, it is meant to be run in CPU-only
+mode (without using the GPU or USER-CUDA styles).  For example:
+
+mpirun -np 1 ../lmp_linux_double -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mpirun -np 12 ../lmp_linux_double -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+
+The "xyz" settings determine the problem size.  The "t" setting
+determines the number of timesteps.
+
+------------------------------------------------------------------------
+
+If the script has "gpu" in its name, it is meant to be run using
+the GPU package.  For example:
+
+mpirun -np 12 ../lmp_linux_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+
+mpirun -np 8 ../lmp_linux_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+
+The "xyz" settings determine the problem size.  The "t" setting
+determines the number of timesteps.  The "np" setting determines how
+many MPI tasks per compute node the problem will run on, and the "g"
+setting determines how many GPUs per compute node the problem will run
+on, i.e. 1 or 2 in this case.  Note that you can use more MPI tasks
+than GPUs (both per compute node) with the GPU package.
+
+------------------------------------------------------------------------
+
+If the script has "cuda" in its name, it is meant to be run using
+the USER-CUDA package.  For example:
+
+mpirun -np 1 ../lmp_linux_single -c on -sf cuda -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+
+mpirun -np 2 ../lmp_linux_double -c on -sf cuda -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.eam.cuda
+
+The "xyz" settings determine the problem size.  The "t" setting
+determines the number of timesteps.  The "np" setting determines how
+many MPI tasks per compute node the problem will run on, and the "g"
+setting determines how many GPUs per compute node the problem will run
+on, i.e. 1 or 2 in this case.  For the USER-CUDA package, the number
+of MPI tasks and GPUs (both per compute node) must be equal.
+
+------------------------------------------------------------------------
+
+If the script has "titan" in its name, it was run on the Titan supercomputer
+at ORNL.
diff --git a/bench/KEPLER/build.py b/bench/KEPLER/build.py
new file mode 100644
index 000000000..39b7617a0
--- /dev/null
+++ b/bench/KEPLER/build.py
@@ -0,0 +1,181 @@
+#!/usr/local/bin/python
+
+# Syntax: build.py target1 target2 ...
+#         targets:
+#         cpu, opt, omp,
+#         gpu/double, gpu/mixed, gpu/single,
+#         cuda/double, cuda/mixed, cuda/single,
+#         intel/cpu, intel/phi,
+#         kokkos/omp, kokkos/phi, kokkos/cuda
+#         gpu = gpu/double + gpu/mixed + gpu/single
+#         cuda = cuda/double + cuda/mixed + cuda/single
+#         intel = intel/cpu + intel/phi
+#         kokkos = kokkos/omp + kokkos/phi + kokkos/cuda
+#         all = cpu + opt + omp + gpu + cuda + intel + kokkos
+
+# create exectuables for different packages
+# MUST set lmpdir to path of LAMMPS home directory
+
+import sys,commands,os
+
+lmpdir = "~/lammps"
+
+# build LAMMPS
+# copy makefile into src/MAKE as Makefile.foo, then remove it
+
+def build_lammps(makefile,pkg):
+  print "Building LAMMPS with %s and %s packages ..." % (makefile,pkg)
+  commands.getoutput("cp %s %s/src/MAKE/Makefile.foo" % (makefile,lmpdir))
+  cwd = os.getcwd()
+  os.chdir(os.path.expanduser(lmpdir + "/src"))
+  str = "make clean-foo"
+  txt = commands.getoutput(str)
+  str = "make no-all"
+  txt = commands.getoutput(str)
+  for package in pkg:
+    str = "make yes-%s" % package
+    txt = commands.getoutput(str)
+    print txt
+  str = "make -j 16 foo"
+  txt = commands.getoutput(str)
+  os.remove("MAKE/Makefile.foo")
+  os.chdir(cwd)
+
+# build GPU library in LAMMPS
+# copy makefile into lib/gpu as Makefile.foo, then remove it
+  
+def build_gpu(makefile):
+  print "Building GPU lib with %s ..." % makefile
+  commands.getoutput("cp %s %s/lib/gpu/Makefile.foo" % (makefile,lmpdir))
+  cwd = os.getcwd()
+  os.chdir(os.path.expanduser(lmpdir + "/lib/gpu"))
+  str = "make -f Makefile.foo clean"
+  txt = commands.getoutput(str)
+  str = "make -j 16 -f Makefile.foo"
+  txt = commands.getoutput(str)
+  os.remove("Makefile.foo")
+  os.chdir(cwd)
+
+# build CUDA library in LAMMPS
+# set precision and arch explicitly as options to make in lib/cuda
+  
+def build_cuda(precision,arch):
+  print "Building USER-CUDA lib with %s and arch sm_%d ..." % (precision,arch)
+  cwd = os.getcwd()
+  os.chdir(os.path.expanduser(lmpdir + "/lib/cuda"))
+  str = "make clean"
+  txt = commands.getoutput(str)
+  if precision == "double": pflag = 2
+  elif precision == "mixed": pflag = 4
+  elif precision == "single": pflag = 1
+  str = "make -j 16 precision=%d arch=%s" % (pflag,arch)
+  txt = commands.getoutput(str)
+
+  os.chdir(cwd)
+
+# main program
+# convert target keywords into target flags
+
+cpu = opt = omp = 0
+gpu_double = gpu_mixed = gpu_single = 0
+cuda_double = cuda_mixed = cuda_single = 0
+intel_cpu = intel_phi = 0
+kokkos_omp = kokkos_phi = kokkos_cuda = 0
+  
+targets = sys.argv[1:]
+for target in targets:
+  if target == "cpu": cpu = 1
+  elif target == "opt": opt = 1
+  elif target == "omp": omp = 1
+  elif target == "gpu/double": gpu_double = 1
+  elif target == "gpu/mixed": gpu_mixed = 1
+  elif target == "gpu/single": gpu_single = 1
+  elif target == "gpu": gpu_double = gpu_mixed = gpu_single = 1
+  elif target == "cuda/double": cuda_double = 1
+  elif target == "cuda/mixed": cuda_mixed = 1
+  elif target == "cuda/single": cuda_single = 1
+  elif target == "cuda": cuda_double = cuda_mixed = cuda_single = 1
+  elif target == "intel/cpu": intel_cpu = 1
+  elif target == "intel/phi": intel_phi = 1
+  elif target == "intel": intel_cpu = intel_phi = 1
+  elif target == "kokkos/omp": kokkos_omp = 1
+  elif target == "kokkos/phi": kokkos_phi = 1
+  elif target == "kokkos/cuda": kokkos_cuda = 1
+  elif target == "kokkos": kokkos_omp = kokkos_phi = kokkos_cuda = 1
+  else: print "Target",target,"is unknown"
+
+# CPU
+
+if cpu:
+  build_lammps(makefile = "Makefile.cpu", pkg = [])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_cpu" % lmpdir)
+
+# OPT
+
+if opt:
+  build_lammps(makefile = "Makefile.opt", pkg = ["opt"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_opt" % lmpdir)
+
+# OMP
+
+if omp:
+  build_lammps(makefile = "Makefile.omp", pkg = ["user-omp"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_omp" % lmpdir)
+
+# GPU, 3 precisions
+
+if gpu_double:
+  build_gpu(makefile = "Makefile.gpu.double")
+  build_lammps(makefile = "Makefile.gpu", pkg = ["gpu"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_gpu_double" % lmpdir)
+
+if gpu_mixed:
+  build_gpu(makefile = "Makefile.gpu.mixed")
+  build_lammps(makefile = "Makefile.gpu", pkg = ["gpu"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_gpu_mixed" % lmpdir)
+
+if gpu_single:
+  build_gpu(makefile = "Makefile.gpu.single")
+  build_lammps(makefile = "Makefile.gpu", pkg = ["gpu"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_gpu_single" % lmpdir)
+
+# CUDA, 3 precisions
+
+if cuda_double:
+  build_cuda(precision = "double", arch = 35)
+  build_lammps(makefile = "Makefile.cuda", pkg = ["kspace","user-cuda"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_cuda_double" % lmpdir)
+
+if cuda_mixed:
+  build_cuda(precision = "mixed", arch = 35)
+  build_lammps(makefile = "Makefile.cuda", pkg = ["kspace","user-cuda"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_cuda_mixed" % lmpdir)
+
+if cuda_single:
+  build_cuda(precision = "single", arch = 35)
+  build_lammps(makefile = "Makefile.cuda", pkg = ["kspace","user-cuda"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_cuda_single" % lmpdir)
+
+# INTEL, CPU and Phi
+
+if intel_cpu:
+  build_lammps(makefile = "Makefile.intel.cpu", pkg = ["user-intel"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_intel_cpu" % lmpdir)
+
+if intel_phi:
+  build_lammps(makefile = "Makefile.intel.phi", pkg = ["user-intel","user-omp"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_intel_phi" % lmpdir)
+
+# KOKKOS, all variants
+
+if kokkos_omp:
+  build_lammps(makefile = "Makefile.kokkos.omp", pkg = ["kokkos"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_kokkos_omp" % lmpdir)
+
+if kokkos_phi:
+  build_lammps(makefile = "Makefile.kokkos.phi", pkg = ["kokkos"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_kokkos_phi" % lmpdir)
+
+if kokkos_cuda:
+  build_lammps(makefile = "Makefile.kokkos.cuda", pkg = ["kokkos"])
+  print commands.getoutput("mv %s/src/lmp_foo ./lmp_kokkos_cuda" % lmpdir)
diff --git a/bench/KEPLER/in.lj.cpu b/bench/KEPLER/in.lj.cpu
new file mode 100644
index 000000000..ab6988e28
--- /dev/null
+++ b/bench/KEPLER/in.lj.cpu
@@ -0,0 +1,22 @@
+# 3d Lennard-Jones melt
+
+units		lj
+atom_style	atomic
+
+lattice		fcc 0.8442
+region		box block 0 $x 0 $y 0 $z
+create_box	1 box
+create_atoms	1 box
+mass		1 1.0
+
+velocity	all create 1.44 87287 loop geom
+
+pair_style	lj/cut 2.5
+pair_coeff	1 1 1.0 1.0 2.5
+
+neighbor	0.3 bin
+neigh_modify	delay 0 every 20 check no
+
+fix		1 all nve
+
+run		$t
diff --git a/bench/KEPLER/in.lj.cuda b/bench/KEPLER/in.lj.cuda
new file mode 100644
index 000000000..fcf6e37bc
--- /dev/null
+++ b/bench/KEPLER/in.lj.cuda
@@ -0,0 +1,26 @@
+# 3d Lennard-Jones melt
+
+# set number of GPUs
+
+package         cuda gpu/node $g
+
+units		lj
+atom_style	atomic
+
+lattice		fcc 0.8442
+region		box block 0 $x 0 $y 0 $z
+create_box	1 box
+create_atoms	1 box
+mass		1 1.0
+
+velocity	all create 1.44 87287 loop geom
+
+pair_style	lj/cut 2.5
+pair_coeff	1 1 1.0 1.0 2.5
+
+neighbor	0.3 bin
+neigh_modify	delay 0 every 20 check no
+
+fix		1 all nve
+
+run		$t
diff --git a/bench/KEPLER/in.lj.gpu b/bench/KEPLER/in.lj.gpu
new file mode 100644
index 000000000..6cc283136
--- /dev/null
+++ b/bench/KEPLER/in.lj.gpu
@@ -0,0 +1,29 @@
+# 3d Lennard-Jones melt
+
+# newton off is required for GPU package
+# set number of GPUs
+
+newton          off
+if "$g == 1" then "package gpu force/neigh 0 0 1"
+if "$g == 2" then "package gpu force/neigh 0 1 1"
+
+units		lj
+atom_style	atomic
+
+lattice		fcc 0.8442
+region		box block 0 $x 0 $y 0 $z
+create_box	1 box
+create_atoms	1 box
+mass		1 1.0
+
+velocity	all create 1.44 87287 loop geom
+
+pair_style	lj/cut 2.5
+pair_coeff	1 1 1.0 1.0 2.5
+
+neighbor	0.3 bin
+neigh_modify	delay 0 every 20 check no
+
+fix		1 all nve
+
+run		$t
diff --git a/bench/KEPLER/in.lj.intel.cpu b/bench/KEPLER/in.lj.intel.cpu
new file mode 100644
index 000000000..f8634ead8
--- /dev/null
+++ b/bench/KEPLER/in.lj.intel.cpu
@@ -0,0 +1,26 @@
+# 3d Lennard-Jones melt
+
+# set precision for USER-INTEL package
+
+package         intel 1 $a balance 0.0
+
+units		lj
+atom_style	atomic
+
+lattice		fcc 0.8442
+region		box block 0 $x 0 $y 0 $z
+create_box	1 box
+create_atoms	1 box
+mass		1 1.0
+
+velocity	all create 1.44 87287 loop geom
+
+pair_style	lj/cut 2.5
+pair_coeff	1 1 1.0 1.0 2.5
+
+neighbor	0.3 bin
+neigh_modify	delay 0 every 20 check no
+
+fix		1 all nve
+
+run		$t
diff --git a/bench/KEPLER/in.lj.kokkos.cuda b/bench/KEPLER/in.lj.kokkos.cuda
new file mode 100644
index 000000000..ae403e70c
--- /dev/null
+++ b/bench/KEPLER/in.lj.kokkos.cuda
@@ -0,0 +1,29 @@
+# 3d Lennard-Jones melt
+
+# set neighlist style and comm mode
+
+package         kokkos neigh full comm/exchange $c comm/forward $c
+newton          off
+
+units		lj
+atom_style	atomic
+
+lattice		fcc 0.8442
+region		box block 0 $x 0 $y 0 $z
+create_box	1 box
+create_atoms	1 box
+mass		1 1.0
+
+velocity	all create 1.44 87287 loop geom
+
+pair_style	lj/cut 2.5
+pair_coeff	1 1 1.0 1.0 2.5
+
+# set neighbor bins to cutoff + skin
+
+neighbor	0.3 bin
+neigh_modify	delay 0 every 20 check no binsize 2.8
+
+fix		1 all nve
+
+run		$t
diff --git a/bench/KEPLER/in.lj.kokkos.omp b/bench/KEPLER/in.lj.kokkos.omp
new file mode 100644
index 000000000..34bfddcbd
--- /dev/null
+++ b/bench/KEPLER/in.lj.kokkos.omp
@@ -0,0 +1,26 @@
+# 3d Lennard-Jones melt
+
+# set neighlist style and comm mode
+
+package         kokkos neigh $n comm/exchange $c comm/forward $c
+
+units		lj
+atom_style	atomic
+
+lattice		fcc 0.8442
+region		box block 0 $x 0 $y 0 $z
+create_box	1 box
+create_atoms	1 box
+mass		1 1.0
+
+velocity	all create 1.44 87287 loop geom
+
+pair_style	lj/cut 2.5
+pair_coeff	1 1 1.0 1.0 2.5
+
+neighbor	0.3 bin
+neigh_modify	delay 0 every 20 check no
+
+fix		1 all nve
+
+run		$t
diff --git a/bench/KEPLER/in.lj.omp b/bench/KEPLER/in.lj.omp
new file mode 100644
index 000000000..3c80af3e3
--- /dev/null
+++ b/bench/KEPLER/in.lj.omp
@@ -0,0 +1,26 @@
+# 3d Lennard-Jones melt
+
+# set number of threads per MPI task
+
+package         omp $h
+
+units		lj
+atom_style	atomic
+
+lattice		fcc 0.8442
+region		box block 0 $x 0 $y 0 $z
+create_box	1 box
+create_atoms	1 box
+mass		1 1.0
+
+velocity	all create 1.44 87287 loop geom
+
+pair_style	lj/cut 2.5
+pair_coeff	1 1 1.0 1.0 2.5
+
+neighbor	0.3 bin
+neigh_modify	delay 0 every 20 check no
+
+fix		1 all nve
+
+run		$t
diff --git a/bench/KEPLER/in.lj.opt b/bench/KEPLER/in.lj.opt
new file mode 100644
index 000000000..ab6988e28
--- /dev/null
+++ b/bench/KEPLER/in.lj.opt
@@ -0,0 +1,22 @@
+# 3d Lennard-Jones melt
+
+units		lj
+atom_style	atomic
+
+lattice		fcc 0.8442
+region		box block 0 $x 0 $y 0 $z
+create_box	1 box
+create_atoms	1 box
+mass		1 1.0
+
+velocity	all create 1.44 87287 loop geom
+
+pair_style	lj/cut 2.5
+pair_coeff	1 1 1.0 1.0 2.5
+
+neighbor	0.3 bin
+neigh_modify	delay 0 every 20 check no
+
+fix		1 all nve
+
+run		$t
diff --git a/bench/KEPLER/run_cpu.sh b/bench/KEPLER/run_cpu.sh
new file mode 100644
index 000000000..b08d606c9
--- /dev/null
+++ b/bench/KEPLER/run_cpu.sh
@@ -0,0 +1,434 @@
+#!/bin/bash
+#SBATCH -N 1 --time=12:00:00
+
+mpirun -N 1 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.1
+mpirun -N 1 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.1
+mpirun -N 1 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.1
+mpirun -N 1 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.1
+mpirun -N 1 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.1
+mpirun -N 1 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.1
+mpirun -N 1 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.1
+mpirun -N 1 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.1
+mpirun -N 1 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.1
+mpirun -N 1 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.1
+mpirun -N 1 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.1
+mpirun -N 1 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.1
+mpirun -N 1 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.1
+
+mpirun -N 2 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.2
+mpirun -N 2 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.2
+mpirun -N 2 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.2
+mpirun -N 2 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.2
+mpirun -N 2 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.2
+mpirun -N 2 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.2
+mpirun -N 2 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.2
+mpirun -N 2 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.2
+mpirun -N 2 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.2
+mpirun -N 2 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.2
+mpirun -N 2 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.2
+mpirun -N 2 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.2
+mpirun -N 2 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.2
+
+mpirun -N 3 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.3
+mpirun -N 3 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.3
+mpirun -N 3 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.3
+mpirun -N 3 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.3
+mpirun -N 3 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.3
+mpirun -N 3 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.3
+mpirun -N 3 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.3
+mpirun -N 3 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.3
+mpirun -N 3 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.3
+mpirun -N 3 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.3
+mpirun -N 3 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.3
+mpirun -N 3 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.3
+mpirun -N 3 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.3
+
+mpirun -N 4 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.4
+mpirun -N 4 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.4
+mpirun -N 4 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.4
+mpirun -N 4 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.4
+mpirun -N 4 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.4
+mpirun -N 4 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.4
+mpirun -N 4 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.4
+mpirun -N 4 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.4
+mpirun -N 4 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.4
+mpirun -N 4 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.4
+mpirun -N 4 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.4
+mpirun -N 4 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.4
+mpirun -N 4 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.4
+
+mpirun -N 5 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.5
+mpirun -N 5 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.5
+mpirun -N 5 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.5
+mpirun -N 5 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.5
+mpirun -N 5 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.5
+mpirun -N 5 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.5
+mpirun -N 5 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.5
+mpirun -N 5 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.5
+mpirun -N 5 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.5
+mpirun -N 5 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.5
+mpirun -N 5 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.5
+mpirun -N 5 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.5
+mpirun -N 5 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.5
+
+mpirun -N 6 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.6
+mpirun -N 6 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.6
+mpirun -N 6 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.6
+mpirun -N 6 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.6
+mpirun -N 6 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.6
+mpirun -N 6 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.6
+mpirun -N 6 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.6
+mpirun -N 6 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.6
+mpirun -N 6 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.6
+mpirun -N 6 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.6
+mpirun -N 6 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.6
+mpirun -N 6 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.6
+mpirun -N 6 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.6
+
+mpirun -N 7 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.7
+mpirun -N 7 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.7
+mpirun -N 7 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.7
+mpirun -N 7 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.7
+mpirun -N 7 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.7
+mpirun -N 7 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.7
+mpirun -N 7 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.7
+mpirun -N 7 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.7
+mpirun -N 7 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.7
+mpirun -N 7 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.7
+mpirun -N 7 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.7
+mpirun -N 7 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.7
+mpirun -N 7 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.7
+
+mpirun -N 8 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.8
+mpirun -N 8 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.8
+mpirun -N 8 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.8
+mpirun -N 8 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.8
+mpirun -N 8 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.8
+mpirun -N 8 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.8
+mpirun -N 8 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.8
+mpirun -N 8 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.8
+mpirun -N 8 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.8
+mpirun -N 8 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.8
+mpirun -N 8 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.8
+mpirun -N 8 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.8
+mpirun -N 8 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.8
+
+mpirun -N 9 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.9
+mpirun -N 9 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.9
+mpirun -N 9 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.9
+mpirun -N 9 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.9
+mpirun -N 9 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.9
+mpirun -N 9 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.9
+mpirun -N 9 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.9
+mpirun -N 9 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.9
+mpirun -N 9 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.9
+mpirun -N 9 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.9
+mpirun -N 9 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.9
+mpirun -N 9 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.9
+mpirun -N 9 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.9
+
+mpirun -N 10 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.10
+mpirun -N 10 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.10
+mpirun -N 10 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.10
+mpirun -N 10 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.10
+mpirun -N 10 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.10
+mpirun -N 10 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.10
+mpirun -N 10 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.10
+mpirun -N 10 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.10
+mpirun -N 10 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.10
+mpirun -N 10 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.10
+mpirun -N 10 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.10
+mpirun -N 10 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.10
+mpirun -N 10 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.10
+
+mpirun -N 11 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.11
+mpirun -N 11 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.11
+mpirun -N 11 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.11
+mpirun -N 11 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.11
+mpirun -N 11 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.11
+mpirun -N 11 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.11
+mpirun -N 11 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.11
+mpirun -N 11 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.11
+mpirun -N 11 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.11
+mpirun -N 11 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.11
+mpirun -N 11 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.11
+mpirun -N 11 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.11
+mpirun -N 11 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.11
+
+mpirun -N 12 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.12
+mpirun -N 12 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.12
+mpirun -N 12 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.12
+mpirun -N 12 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.12
+mpirun -N 12 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.12
+mpirun -N 12 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.12
+mpirun -N 12 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.12
+mpirun -N 12 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.12
+mpirun -N 12 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.12
+mpirun -N 12 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.12
+mpirun -N 12 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.12
+mpirun -N 12 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.12
+mpirun -N 12 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.12
+
+mpirun -N 13 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.13
+mpirun -N 13 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.13
+mpirun -N 13 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.13
+mpirun -N 13 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.13
+mpirun -N 13 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.13
+mpirun -N 13 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.13
+mpirun -N 13 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.13
+mpirun -N 13 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.13
+mpirun -N 13 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.13
+mpirun -N 13 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.13
+mpirun -N 13 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.13
+mpirun -N 13 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.13
+mpirun -N 13 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.13
+
+mpirun -N 14 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.14
+mpirun -N 14 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.14
+mpirun -N 14 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.14
+mpirun -N 14 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.14
+mpirun -N 14 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.14
+mpirun -N 14 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.14
+mpirun -N 14 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.14
+mpirun -N 14 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.14
+mpirun -N 14 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.14
+mpirun -N 14 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.14
+mpirun -N 14 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.14
+mpirun -N 14 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.14
+mpirun -N 14 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.14
+
+mpirun -N 15 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.15
+mpirun -N 15 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.15
+mpirun -N 15 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.15
+mpirun -N 15 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.15
+mpirun -N 15 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.15
+mpirun -N 15 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.15
+mpirun -N 15 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.15
+mpirun -N 15 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.15
+mpirun -N 15 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.15
+mpirun -N 15 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.15
+mpirun -N 15 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.15
+mpirun -N 15 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.15
+mpirun -N 15 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.15
+
+mpirun -N 16 ./lmp_cpu -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2K.16
+mpirun -N 16 ./lmp_cpu -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4K.16
+mpirun -N 16 ./lmp_cpu -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8K.16
+mpirun -N 16 ./lmp_cpu -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.16K.16
+mpirun -N 16 ./lmp_cpu -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.32K.16
+mpirun -N 16 ./lmp_cpu -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.64K.16
+mpirun -N 16 ./lmp_cpu -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.128K.16
+mpirun -N 16 ./lmp_cpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.256K.16
+mpirun -N 16 ./lmp_cpu -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.512K.16
+mpirun -N 16 ./lmp_cpu -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.1024K.16
+mpirun -N 16 ./lmp_cpu -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.2048K.16
+mpirun -N 16 ./lmp_cpu -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.4096K.16
+mpirun -N 16 ./lmp_cpu -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cpu
+mv log.lammps log.28Jun14.lj.cpu.8192K.16
diff --git a/bench/KEPLER/run_gpu.sh b/bench/KEPLER/run_gpu.sh
new file mode 100644
index 000000000..33faeccc6
--- /dev/null
+++ b/bench/KEPLER/run_gpu.sh
@@ -0,0 +1,1658 @@
+#!/bin/bash
+#SBATCH -N 1 --time=12:00:00
+
+mpirun -N 1 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.1
+mpirun -N 1 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.1
+mpirun -N 1 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.1
+mpirun -N 1 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.1
+mpirun -N 1 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.1
+mpirun -N 1 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.1
+mpirun -N 1 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.1
+mpirun -N 1 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.1
+mpirun -N 1 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.1
+mpirun -N 1 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.1
+mpirun -N 1 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.1
+
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.2
+
+mpirun -N 3 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.3
+mpirun -N 3 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.3
+mpirun -N 3 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.3
+mpirun -N 3 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.3
+mpirun -N 3 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.3
+mpirun -N 3 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.3
+mpirun -N 3 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.3
+mpirun -N 3 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.3
+mpirun -N 3 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.3
+mpirun -N 3 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.3
+mpirun -N 3 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.3
+
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.4
+
+mpirun -N 5 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.5
+mpirun -N 5 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.5
+mpirun -N 5 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.5
+mpirun -N 5 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.5
+mpirun -N 5 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.5
+mpirun -N 5 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.5
+mpirun -N 5 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.5
+mpirun -N 5 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.5
+mpirun -N 5 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.5
+mpirun -N 5 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.5
+mpirun -N 5 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.5
+
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.6
+
+mpirun -N 7 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.7
+mpirun -N 7 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.7
+mpirun -N 7 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.7
+mpirun -N 7 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.7
+mpirun -N 7 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.7
+mpirun -N 7 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.7
+mpirun -N 7 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.7
+mpirun -N 7 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.7
+mpirun -N 7 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.7
+mpirun -N 7 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.7
+mpirun -N 7 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.7
+
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.8
+
+mpirun -N 9 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.9
+mpirun -N 9 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.9
+mpirun -N 9 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.9
+mpirun -N 9 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.9
+mpirun -N 9 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.9
+mpirun -N 9 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.9
+mpirun -N 9 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.9
+mpirun -N 9 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.9
+mpirun -N 9 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.9
+mpirun -N 9 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.9
+mpirun -N 9 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.9
+
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.10
+
+mpirun -N 11 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.11
+mpirun -N 11 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.11
+mpirun -N 11 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.11
+mpirun -N 11 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.11
+mpirun -N 11 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.11
+mpirun -N 11 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.11
+mpirun -N 11 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.11
+mpirun -N 11 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.11
+mpirun -N 11 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.11
+mpirun -N 11 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.11
+mpirun -N 11 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.11
+
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.12
+
+mpirun -N 13 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.13
+mpirun -N 13 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.13
+mpirun -N 13 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.13
+mpirun -N 13 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.13
+mpirun -N 13 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.13
+mpirun -N 13 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.13
+mpirun -N 13 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.13
+mpirun -N 13 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.13
+mpirun -N 13 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.13
+mpirun -N 13 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.13
+mpirun -N 13 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.13
+
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.14
+
+mpirun -N 15 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.15
+mpirun -N 15 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.15
+mpirun -N 15 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.15
+mpirun -N 15 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.15
+mpirun -N 15 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.15
+mpirun -N 15 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.15
+mpirun -N 15 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.15
+mpirun -N 15 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.15
+mpirun -N 15 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.15
+mpirun -N 15 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.15
+mpirun -N 15 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.15
+
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.1.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.1.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.1.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.1.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.1.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.1.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.1.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.1.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.1.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.1.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.1.16
+
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.2.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.2.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.2.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.2.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.2.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.2.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.2.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.2.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.2.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.2.2
+mpirun -N 2 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.2.2
+
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.2.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.2.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.2.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.2.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.2.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.2.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.2.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.2.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.2.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.2.4
+mpirun -N 4 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.2.4
+
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.2.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.2.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.2.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.2.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.2.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.2.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.2.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.2.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.2.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.2.6
+mpirun -N 6 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.2.6
+
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.2.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.2.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.2.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.2.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.2.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.2.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.2.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.2.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.2.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.2.8
+mpirun -N 8 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.2.8
+
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.2.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.2.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.2.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.2.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.2.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.2.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.2.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.2.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.2.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.2.10
+mpirun -N 10 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.2.10
+
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.2.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.2.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.2.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.2.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.2.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.2.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.2.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.2.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.2.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.2.12
+mpirun -N 12 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.2.12
+
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.2.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.2.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.2.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.2.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.2.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.2.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.2.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.2.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.2.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.2.14
+mpirun -N 14 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.2.14
+
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2K.2.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.4K.2.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.8K.2.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.16K.2.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.32K.2.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.64K.2.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.128K.2.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.256K.2.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.512K.2.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.1024K.2.16
+mpirun -N 16 ./lmp_gpu_double -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.double.2048K.2.16
+
+mpirun -N 1 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.1
+mpirun -N 1 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.1
+mpirun -N 1 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.1
+mpirun -N 1 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.1
+mpirun -N 1 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.1
+mpirun -N 1 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.1
+mpirun -N 1 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.1
+mpirun -N 1 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.1
+mpirun -N 1 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.1
+mpirun -N 1 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.1
+mpirun -N 1 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.1
+
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.2
+
+mpirun -N 3 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.3
+mpirun -N 3 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.3
+mpirun -N 3 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.3
+mpirun -N 3 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.3
+mpirun -N 3 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.3
+mpirun -N 3 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.3
+mpirun -N 3 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.3
+mpirun -N 3 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.3
+mpirun -N 3 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.3
+mpirun -N 3 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.3
+mpirun -N 3 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.3
+
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.4
+
+mpirun -N 5 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.5
+mpirun -N 5 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.5
+mpirun -N 5 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.5
+mpirun -N 5 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.5
+mpirun -N 5 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.5
+mpirun -N 5 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.5
+mpirun -N 5 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.5
+mpirun -N 5 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.5
+mpirun -N 5 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.5
+mpirun -N 5 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.5
+mpirun -N 5 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.5
+
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.6
+
+mpirun -N 7 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.7
+mpirun -N 7 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.7
+mpirun -N 7 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.7
+mpirun -N 7 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.7
+mpirun -N 7 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.7
+mpirun -N 7 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.7
+mpirun -N 7 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.7
+mpirun -N 7 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.7
+mpirun -N 7 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.7
+mpirun -N 7 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.7
+mpirun -N 7 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.7
+
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.8
+
+mpirun -N 9 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.9
+mpirun -N 9 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.9
+mpirun -N 9 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.9
+mpirun -N 9 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.9
+mpirun -N 9 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.9
+mpirun -N 9 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.9
+mpirun -N 9 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.9
+mpirun -N 9 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.9
+mpirun -N 9 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.9
+mpirun -N 9 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.9
+mpirun -N 9 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.9
+
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.10
+
+mpirun -N 11 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.11
+mpirun -N 11 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.11
+mpirun -N 11 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.11
+mpirun -N 11 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.11
+mpirun -N 11 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.11
+mpirun -N 11 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.11
+mpirun -N 11 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.11
+mpirun -N 11 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.11
+mpirun -N 11 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.11
+mpirun -N 11 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.11
+mpirun -N 11 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.11
+
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.12
+
+mpirun -N 13 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.13
+mpirun -N 13 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.13
+mpirun -N 13 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.13
+mpirun -N 13 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.13
+mpirun -N 13 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.13
+mpirun -N 13 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.13
+mpirun -N 13 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.13
+mpirun -N 13 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.13
+mpirun -N 13 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.13
+mpirun -N 13 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.13
+mpirun -N 13 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.13
+
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.14
+
+mpirun -N 15 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.15
+mpirun -N 15 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.15
+mpirun -N 15 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.15
+mpirun -N 15 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.15
+mpirun -N 15 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.15
+mpirun -N 15 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.15
+mpirun -N 15 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.15
+mpirun -N 15 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.15
+mpirun -N 15 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.15
+mpirun -N 15 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.15
+mpirun -N 15 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.15
+
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.1.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.1.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.1.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.1.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.1.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.1.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.1.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.1.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.1.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.1.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.1.16
+
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.2.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.2.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.2.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.2.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.2.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.2.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.2.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.2.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.2.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.2.2
+mpirun -N 2 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.2.2
+
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.2.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.2.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.2.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.2.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.2.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.2.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.2.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.2.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.2.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.2.4
+mpirun -N 4 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.2.4
+
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.2.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.2.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.2.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.2.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.2.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.2.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.2.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.2.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.2.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.2.6
+mpirun -N 6 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.2.6
+
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.2.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.2.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.2.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.2.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.2.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.2.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.2.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.2.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.2.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.2.8
+mpirun -N 8 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.2.8
+
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.2.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.2.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.2.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.2.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.2.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.2.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.2.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.2.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.2.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.2.10
+mpirun -N 10 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.2.10
+
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.2.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.2.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.2.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.2.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.2.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.2.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.2.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.2.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.2.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.2.12
+mpirun -N 12 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.2.12
+
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.2.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.2.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.2.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.2.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.2.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.2.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.2.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.2.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.2.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.2.14
+mpirun -N 14 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.2.14
+
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2K.2.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.4K.2.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.8K.2.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.16K.2.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.32K.2.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.64K.2.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.128K.2.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.256K.2.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.512K.2.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.1024K.2.16
+mpirun -N 16 ./lmp_gpu_mixed -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.mixed.2048K.2.16
+
+mpirun -N 1 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.1
+mpirun -N 1 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.1
+mpirun -N 1 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.1
+mpirun -N 1 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.1
+mpirun -N 1 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.1
+mpirun -N 1 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.1
+mpirun -N 1 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.1
+mpirun -N 1 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.1
+mpirun -N 1 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.1
+mpirun -N 1 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.1
+mpirun -N 1 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.1
+
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.2
+
+mpirun -N 3 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.3
+mpirun -N 3 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.3
+mpirun -N 3 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.3
+mpirun -N 3 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.3
+mpirun -N 3 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.3
+mpirun -N 3 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.3
+mpirun -N 3 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.3
+mpirun -N 3 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.3
+mpirun -N 3 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.3
+mpirun -N 3 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.3
+mpirun -N 3 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.3
+
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.4
+
+mpirun -N 5 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.5
+mpirun -N 5 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.5
+mpirun -N 5 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.5
+mpirun -N 5 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.5
+mpirun -N 5 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.5
+mpirun -N 5 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.5
+mpirun -N 5 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.5
+mpirun -N 5 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.5
+mpirun -N 5 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.5
+mpirun -N 5 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.5
+mpirun -N 5 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.5
+
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.6
+
+mpirun -N 7 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.7
+mpirun -N 7 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.7
+mpirun -N 7 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.7
+mpirun -N 7 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.7
+mpirun -N 7 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.7
+mpirun -N 7 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.7
+mpirun -N 7 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.7
+mpirun -N 7 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.7
+mpirun -N 7 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.7
+mpirun -N 7 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.7
+mpirun -N 7 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.7
+
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.8
+
+mpirun -N 9 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.9
+mpirun -N 9 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.9
+mpirun -N 9 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.9
+mpirun -N 9 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.9
+mpirun -N 9 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.9
+mpirun -N 9 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.9
+mpirun -N 9 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.9
+mpirun -N 9 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.9
+mpirun -N 9 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.9
+mpirun -N 9 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.9
+mpirun -N 9 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.9
+
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.10
+
+mpirun -N 11 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.11
+mpirun -N 11 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.11
+mpirun -N 11 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.11
+mpirun -N 11 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.11
+mpirun -N 11 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.11
+mpirun -N 11 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.11
+mpirun -N 11 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.11
+mpirun -N 11 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.11
+mpirun -N 11 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.11
+mpirun -N 11 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.11
+mpirun -N 11 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.11
+
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.12
+
+mpirun -N 13 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.13
+mpirun -N 13 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.13
+mpirun -N 13 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.13
+mpirun -N 13 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.13
+mpirun -N 13 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.13
+mpirun -N 13 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.13
+mpirun -N 13 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.13
+mpirun -N 13 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.13
+mpirun -N 13 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.13
+mpirun -N 13 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.13
+mpirun -N 13 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.13
+
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.14
+
+mpirun -N 15 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.15
+mpirun -N 15 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.15
+mpirun -N 15 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.15
+mpirun -N 15 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.15
+mpirun -N 15 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.15
+mpirun -N 15 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.15
+mpirun -N 15 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.15
+mpirun -N 15 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.15
+mpirun -N 15 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.15
+mpirun -N 15 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.15
+mpirun -N 15 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.15
+
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.1.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.1.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.1.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.1.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.1.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.1.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.1.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.1.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.1.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.1.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.1.16
+
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.2.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.2.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.2.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.2.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.2.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.2.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.2.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.2.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.2.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.2.2
+mpirun -N 2 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.2.2
+
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.2.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.2.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.2.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.2.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.2.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.2.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.2.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.2.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.2.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.2.4
+mpirun -N 4 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.2.4
+
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.2.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.2.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.2.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.2.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.2.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.2.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.2.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.2.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.2.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.2.6
+mpirun -N 6 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.2.6
+
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.2.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.2.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.2.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.2.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.2.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.2.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.2.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.2.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.2.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.2.8
+mpirun -N 8 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.2.8
+
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.2.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.2.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.2.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.2.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.2.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.2.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.2.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.2.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.2.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.2.10
+mpirun -N 10 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.2.10
+
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.2.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.2.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.2.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.2.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.2.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.2.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.2.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.2.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.2.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.2.12
+mpirun -N 12 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.2.12
+
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.2.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.2.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.2.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.2.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.2.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.2.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.2.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.2.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.2.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.2.14
+mpirun -N 14 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.2.14
+
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2K.2.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.4K.2.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.8K.2.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.16K.2.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.32K.2.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.64K.2.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.128K.2.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.256K.2.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.512K.2.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.1024K.2.16
+mpirun -N 16 ./lmp_gpu_single -sf gpu -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.gpu
+mv log.lammps log.28Jun14.lj.gpu.single.2048K.2.16
diff --git a/bench/KEPLER/run_intel_cpu.sh b/bench/KEPLER/run_intel_cpu.sh
new file mode 100644
index 000000000..0cc44a620
--- /dev/null
+++ b/bench/KEPLER/run_intel_cpu.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+#SBATCH -N 1 --time=12:00:00
+
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.2K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.4K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.8K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.16K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.32K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.64K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.128K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.256K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.512K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.1024K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.2048K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.4096K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 1 -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.single.8192K.16
+
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.2K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.4K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.8K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.16K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.32K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.64K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.128K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.256K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.512K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.1024K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.2048K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.4096K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 3 -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.mixed.8192K.16
+
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.2K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.4K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.8K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.16K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.32K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.64K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.128K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.256K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.512K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.1024K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.2048K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.4096K.16
+mpirun -np 16 lmp_intel_cpu -sf intel -v a 2 -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.intel.cpu
+mv log.lammps log.28Jun14.lj.intel.cpu.double.8192K.16
diff --git a/bench/KEPLER/run_kokkos_cuda.sh b/bench/KEPLER/run_kokkos_cuda.sh
new file mode 100644
index 000000000..778cd9acf
--- /dev/null
+++ b/bench/KEPLER/run_kokkos_cuda.sh
@@ -0,0 +1,650 @@
+#!/bin/bash
+#SBATCH -N 1 --time=12:00:00
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.1
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.1
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.1
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.1
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.1
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.1
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.1
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.1
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.1
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.1
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.1
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.1
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 1 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.1
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.2
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.2
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.2
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.2
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.2
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.2
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.2
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.2
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.2
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.2
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.2
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.2
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 2 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.2
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.3
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.3
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.3
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.3
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.3
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.3
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.3
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.3
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.3
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.3
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.3
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.3
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 3 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.3
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.4
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.4
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.4
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.4
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.4
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.4
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.4
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.4
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.4
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.4
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.4
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.4
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 4 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.4
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.5
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.5
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.5
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.5
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.5
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.5
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.5
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.5
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.5
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.5
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.5
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.5
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 5 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.5
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.6
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.6
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.6
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.6
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.6
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.6
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.6
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.6
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.6
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.6
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.6
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.6
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 6 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.6
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.7
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.7
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.7
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.7
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.7
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.7
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.7
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.7
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.7
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.7
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.7
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.7
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 7 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.7
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.8
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.8
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.8
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.8
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.8
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.8
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.8
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.8
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.8
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.8
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.8
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.8
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 8 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.8
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.9
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.9
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.9
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.9
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.9
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.9
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.9
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.9
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.9
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.9
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.9
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.9
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 9 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.9
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.10
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.10
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.10
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.10
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.10
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.10
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.10
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.10
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.10
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.10
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.10
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.10
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 10 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.10
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.11
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.11
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.11
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.11
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.11
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.11
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.11
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.11
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.11
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.11
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.11
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.11
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 11 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.11
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.12
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.12
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.12
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.12
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.12
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.12
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.12
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.12
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.12
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.12
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.12
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.12
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 12 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.12
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.13
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.13
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.13
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.13
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.13
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.13
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.13
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.13
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.13
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.13
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.13
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.13
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 13 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.13
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.14
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.14
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.14
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.14
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.14
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.14
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.14
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.14
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.14
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.14
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.14
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.14
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 14 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.14
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.15
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.15
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.15
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.15
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.15
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.15
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.15
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.15
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.15
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.15
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.15
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.15
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 15 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.15
+
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.1.16
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.1.16
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.1.16
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.1.16
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.1.16
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.1.16
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.1.16
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.1.16
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.1.16
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.1.16
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.1.16
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.1.16
+mpirun -np 1 ./lmp_kokkos_cuda -k on g 1 t 16 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.1.16
+
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.2.1
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.2.1
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.2.1
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.2.1
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.2.1
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.2.1
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.2.1
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.2.1
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.2.1
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.2.1
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.2.1
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.2.1
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 1 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.2.1
+
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.2.2
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.2.2
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.2.2
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.2.2
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.2.2
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.2.2
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.2.2
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.2.2
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.2.2
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.2.2
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.2.2
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.2.2
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 2 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.2.2
+
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.2.3
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.2.3
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.2.3
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.2.3
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.2.3
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.2.3
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.2.3
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.2.3
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.2.3
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.2.3
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.2.3
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.2.3
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 3 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.2.3
+
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.2.4
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.2.4
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.2.4
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.2.4
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.2.4
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.2.4
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.2.4
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.2.4
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.2.4
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.2.4
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.2.4
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.2.4
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 4 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.2.4
+
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.2.5
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.2.5
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.2.5
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.2.5
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.2.5
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.2.5
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.2.5
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.2.5
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.2.5
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.2.5
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.2.5
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.2.5
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 5 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.2.5
+
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.2.6
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.2.6
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.2.6
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.2.6
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.2.6
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.2.6
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.2.6
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.2.6
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.2.6
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.2.6
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.2.6
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.2.6
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 6 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.2.6
+
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.2.7
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.2.7
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.2.7
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.2.7
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.2.7
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.2.7
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.2.7
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.2.7
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.2.7
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.2.7
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.2.7
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.2.7
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 7 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.2.7
+
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2K.2.8
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4K.2.8
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8K.2.8
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.16K.2.8
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.32K.2.8
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.64K.2.8
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.128K.2.8
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.256K.2.8
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.512K.2.8
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.1024K.2.8
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.2048K.2.8
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.4096K.2.8
+mpirun -np 2 ./lmp_kokkos_cuda -k on g 2 t 8 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.cuda
+mv log.lammps log.28Jun14.lj.kokkos.cuda.8192K.2.8
diff --git a/bench/KEPLER/run_kokkos_omp.sh b/bench/KEPLER/run_kokkos_omp.sh
new file mode 100644
index 000000000..b1fa78c79
--- /dev/null
+++ b/bench/KEPLER/run_kokkos_omp.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+#SBATCH -N 1 --time=12:00:00
+
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.2K.1.8
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.4K.1.8
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.8K.1.8
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.16K.1.8
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.32K.1.8
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.64K.1.8
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.128K.1.8
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.256K.1.8
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.512K.1.8
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.1024K.1.8
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.2048K.1.8
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.4096K.1.8
+mpirun -np 1 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.8192K.1.8
+
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.2K.2.8
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.4K.2.8
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.8K.2.8
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.16K.2.8
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.32K.2.8
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.64K.2.8
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.128K.2.8
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.256K.2.8
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.512K.2.8
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.1024K.2.8
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.2048K.2.8
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.4096K.2.8
+mpirun -np 2 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 8 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.8192K.2.8
+
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.2K.4.4
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.4K.4.4
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.8K.4.4
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.16K.4.4
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.32K.4.4
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.64K.4.4
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.128K.4.4
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.256K.4.4
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.512K.4.4
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.1024K.4.4
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.2048K.4.4
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.4096K.4.4
+mpirun -np 4 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 4 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.8192K.4.4
+
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.2K.8.2
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.4K.8.2
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.8K.8.2
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.16K.8.2
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.32K.8.2
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.64K.8.2
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.128K.8.2
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.256K.8.2
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.512K.8.2
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.1024K.8.2
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.2048K.8.2
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.4096K.8.2
+mpirun -np 8 -bind-to socket -map-by socket -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 2 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.omp
+mv log.lammps log.28Jun14.lj.kokkos.omp.8192K.8.2
+
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.2K.16.1
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.4K.16.1
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.8K.16.1
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.16K.16.1
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.32K.16.1
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.64K.16.1
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.128K.16.1
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.256K.16.1
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.512K.16.1
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.1024K.16.1
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.2048K.16.1
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.4096K.16.1
+mpirun -np 16 -bind-to core -map-by core -x KMP_AFFINITY=scatter ./lmp_kokkos_omp -k on t 1 -sf kk -v c device -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.kokkos.omphalf
+mv log.lammps log.28Jun14.lj.kokkos.omp.8192K.16.1
diff --git a/bench/KEPLER/run_opt.sh b/bench/KEPLER/run_opt.sh
new file mode 100644
index 000000000..30d80a999
--- /dev/null
+++ b/bench/KEPLER/run_opt.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#SBATCH -N 1 --time=12:00:00
+
+mpirun -np 16 lmp_opt -sf opt -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.2K.16
+mpirun -np 16 lmp_opt -sf opt -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.4K.16
+mpirun -np 16 lmp_opt -sf opt -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.8K.16
+mpirun -np 16 lmp_opt -sf opt -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.16K.16
+mpirun -np 16 lmp_opt -sf opt -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.32K.16
+mpirun -np 16 lmp_opt -sf opt -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.64K.16
+mpirun -np 16 lmp_opt -sf opt -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.128K.16
+mpirun -np 16 lmp_opt -sf opt -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.256K.16
+mpirun -np 16 lmp_opt -sf opt -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.512K.16
+mpirun -np 16 lmp_opt -sf opt -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.1024K.16
+mpirun -np 16 lmp_opt -sf opt -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.2048K.16
+mpirun -np 16 lmp_opt -sf opt -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.4096K.16
+mpirun -np 16 lmp_opt -sf opt -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.opt
+mv log.lammps log.28Jun14.lj.opt.8192K.16
diff --git a/bench/KEPLER/run_user_cuda.sh b/bench/KEPLER/run_user_cuda.sh
new file mode 100644
index 000000000..7e76177bb
--- /dev/null
+++ b/bench/KEPLER/run_user_cuda.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+#SBATCH -N 1 --time=12:00:00
+
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.2K.1
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.4K.1
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.8K.1
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.16K.1
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.32K.1
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.64K.1
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.128K.1
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.256K.1
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.512K.1
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.1024K.1
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.2048K.1
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.4096K.1
+mpirun -N 1 ./lmp_cuda_double -c on -sf cuda -v g 1 -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.8192K.1
+
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.2K.2
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.4K.2
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.8K.2
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.16K.2
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.32K.2
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.64K.2
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.128K.2
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.256K.2
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.512K.2
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.1024K.2
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.2048K.2
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.4096K.2
+mpirun -N 2 ./lmp_cuda_double -c on -sf cuda -v g 2 -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.double.8192K.2
+
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.2K.1
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.4K.1
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.8K.1
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.16K.1
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.32K.1
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.64K.1
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.128K.1
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.256K.1
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.512K.1
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.1024K.1
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.2048K.1
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.4096K.1
+mpirun -N 1 ./lmp_cuda_mixed -c on -sf cuda -v g 1 -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.8192K.1
+
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.2K.2
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.4K.2
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.8K.2
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.16K.2
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.32K.2
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.64K.2
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.128K.2
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.256K.2
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.512K.2
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.1024K.2
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.2048K.2
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.4096K.2
+mpirun -N 2 ./lmp_cuda_mixed -c on -sf cuda -v g 2 -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.mixed.8192K.2
+
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.2K.1
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.4K.1
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.8K.1
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.16K.1
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.32K.1
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.64K.1
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.128K.1
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.256K.1
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.512K.1
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.1024K.1
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.2048K.1
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.4096K.1
+mpirun -N 1 ./lmp_cuda_single -c on -sf cuda -v g 1 -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.8192K.1
+
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 8 -v y 8 -v z 8 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.2K.2
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 8 -v y 8 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.4K.2
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 8 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.8K.2
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 16 -v y 16 -v z 16 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.16K.2
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 16 -v y 16 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.32K.2
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 16 -v y 32 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.64K.2
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 32 -v y 32 -v z 32 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.128K.2
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.256K.2
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.512K.2
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 64 -v y 64 -v z 64 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.1024K.2
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 64 -v y 64 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.2048K.2
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 64 -v y 128 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.4096K.2
+mpirun -N 2 ./lmp_cuda_single -c on -sf cuda -v g 2 -v x 128 -v y 128 -v z 128 -v t 100 < in.lj.cuda
+mv log.lammps log.28Jun14.lj.cuda.single.8192K.2
diff --git a/bench/KEPLER/run_user_omp.sh b/bench/KEPLER/run_user_omp.sh
new file mode 100644
index 000000000..0b71f15b5
--- /dev/null
+++ b/bench/KEPLER/run_user_omp.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+#SBATCH -N 1 --time=12:00:00
+
+mpirun -bind-to none -N 1 ./lmp_omp -v x 8 -v y 8 -v z 8 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.2K.1.16
+mpirun -bind-to none -N 1 ./lmp_omp -v x 8 -v y 8 -v z 16 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.4K.1.16
+mpirun -bind-to none -N 1 ./lmp_omp -v x 8 -v y 16 -v z 16 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.8K.1.16
+mpirun -bind-to none -N 1 ./lmp_omp -v x 16 -v y 16 -v z 16 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.16K.1.16
+mpirun -bind-to none -N 1 ./lmp_omp -v x 16 -v y 16 -v z 32 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.32K.1.16
+mpirun -bind-to none -N 1 ./lmp_omp -v x 16 -v y 32 -v z 32 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.64K.1.16
+mpirun -bind-to none -N 1 ./lmp_omp -v x 32 -v y 32 -v z 32 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.128K.1.16
+mpirun -bind-to none -N 1 ./lmp_omp -v x 32 -v y 32 -v z 64 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.256K.1.16
+mpirun -bind-to none -N 1 ./lmp_omp -v x 32 -v y 64 -v z 64 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.512K.1.16
+mpirun -bind-to none -N 1 ./lmp_omp -v x 64 -v y 64 -v z 64 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.1024K.1.16
+mpirun -bind-to none -N 1 ./lmp_omp -v x 64 -v y 64 -v z 128 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.2048K.1.16
+mpirun -bind-to none -N 1 ./lmp_omp -v x 64 -v y 128 -v z 128 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.4096K.1.16
+mpirun -bind-to none -N 1 ./lmp_omp -v x 128 -v y 128 -v z 128 -v t 100 -v h 16 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.8192K.1.16
+
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 8 -v y 8 -v z 8 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.2K.2.8
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 8 -v y 8 -v z 16 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.4K.2.8
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 8 -v y 16 -v z 16 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.8K.2.8
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 16 -v y 16 -v z 16 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.16K.2.8
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 16 -v y 16 -v z 32 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.32K.2.8
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 16 -v y 32 -v z 32 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.64K.2.8
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 32 -v y 32 -v z 32 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.128K.2.8
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 32 -v y 32 -v z 64 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.256K.2.8
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 32 -v y 64 -v z 64 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.512K.2.8
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 64 -v y 64 -v z 64 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.1024K.2.8
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 64 -v y 64 -v z 128 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.2048K.2.8
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 64 -v y 128 -v z 128 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.4096K.2.8
+mpirun -bind-to socket -N 2 ./lmp_omp -v x 128 -v y 128 -v z 128 -v t 100 -v h 8 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.8192K.2.8
+
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 8 -v y 8 -v z 8 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.2K.4.4
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 8 -v y 8 -v z 16 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.4K.4.4
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 8 -v y 16 -v z 16 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.8K.4.4
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 16 -v y 16 -v z 16 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.16K.4.4
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 16 -v y 16 -v z 32 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.32K.4.4
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 16 -v y 32 -v z 32 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.64K.4.4
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 32 -v y 32 -v z 32 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.128K.4.4
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 32 -v y 32 -v z 64 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.256K.4.4
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 32 -v y 64 -v z 64 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.512K.4.4
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 64 -v y 64 -v z 64 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.1024K.4.4
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 64 -v y 64 -v z 128 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.2048K.4.4
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 64 -v y 128 -v z 128 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.4096K.4.4
+mpirun -bind-to socket -N 4 ./lmp_omp -v x 128 -v y 128 -v z 128 -v t 100 -v h 4 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.8192K.4.4
+
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 8 -v y 8 -v z 8 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.2K.8.2
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 8 -v y 8 -v z 16 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.4K.8.2
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 8 -v y 16 -v z 16 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.8K.8.2
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 16 -v y 16 -v z 16 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.16K.8.2
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 16 -v y 16 -v z 32 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.32K.8.2
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 16 -v y 32 -v z 32 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.64K.8.2
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 32 -v y 32 -v z 32 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.128K.8.2
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 32 -v y 32 -v z 64 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.256K.8.2
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 32 -v y 64 -v z 64 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.512K.8.2
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 64 -v y 64 -v z 64 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.1024K.8.2
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 64 -v y 64 -v z 128 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.2048K.8.2
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 64 -v y 128 -v z 128 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.4096K.8.2
+mpirun -bind-to socket -N 8 ./lmp_omp -v x 128 -v y 128 -v z 128 -v t 100 -v h 2 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.8192K.8.2
+
+mpirun -bind-to core -N 16 ./lmp_omp -v x 8 -v y 8 -v z 8 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.2K.16.1
+mpirun -bind-to core -N 16 ./lmp_omp -v x 8 -v y 8 -v z 16 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.4K.16.1
+mpirun -bind-to core -N 16 ./lmp_omp -v x 8 -v y 16 -v z 16 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.8K.16.1
+mpirun -bind-to core -N 16 ./lmp_omp -v x 16 -v y 16 -v z 16 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.16K.16.1
+mpirun -bind-to core -N 16 ./lmp_omp -v x 16 -v y 16 -v z 32 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.32K.16.1
+mpirun -bind-to core -N 16 ./lmp_omp -v x 16 -v y 32 -v z 32 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.64K.16.1
+mpirun -bind-to core -N 16 ./lmp_omp -v x 32 -v y 32 -v z 32 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.128K.16.1
+mpirun -bind-to core -N 16 ./lmp_omp -v x 32 -v y 32 -v z 64 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.256K.16.1
+mpirun -bind-to core -N 16 ./lmp_omp -v x 32 -v y 64 -v z 64 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.512K.16.1
+mpirun -bind-to core -N 16 ./lmp_omp -v x 64 -v y 64 -v z 64 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.1024K.16.1
+mpirun -bind-to core -N 16 ./lmp_omp -v x 64 -v y 64 -v z 128 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.2048K.16.1
+mpirun -bind-to core -N 16 ./lmp_omp -v x 64 -v y 128 -v z 128 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.4096K.16.1
+mpirun -bind-to core -N 16 ./lmp_omp -v x 128 -v y 128 -v z 128 -v t 100 -v h 1 -sf omp < in.lj.omp
+mv log.lammps log.28Jun14.lj.omp.8192K.16.1