diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile
index 74cee0ee0..004b38764 100644
--- a/lib/gpu/Nvidia.makefile
+++ b/lib/gpu/Nvidia.makefile
@@ -1,764 +1,770 @@
 CUDA  = $(NVCC) $(CUDA_INCLUDE) $(CUDA_OPTS) -Icudpp_mini $(CUDA_ARCH) \
              $(CUDA_PRECISION)
 CUDR  = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_PRECISION) $(CUDA_INCLUDE) \
          $(CUDPP_OPT)
 CUDA_LINK = $(CUDA_LIB) -lcudart
 BIN2C = $(CUDA_HOME)/bin/bin2c
 
 GPU_LIB = $(LIB_DIR)/libgpu.a
 
 # Headers for Geryon
 UCL_H  = $(wildcard ./geryon/ucl*.h)
 NVC_H  = $(wildcard ./geryon/nvc*.h) $(UCL_H)
 NVD_H  = $(wildcard ./geryon/nvd*.h) $(UCL_H) lal_preprocessor.h
 # Headers for Pair Stuff
 PAIR_H  = lal_atom.h lal_answer.h lal_neighbor_shared.h \
           lal_neighbor.h lal_precision.h lal_device.h \
           lal_balance.h lal_pppm.h
 
 ALL_H = $(NVD_H) $(PAIR_H)
 
 EXECS = $(BIN_DIR)/nvc_get_devices
 ifdef CUDPP_OPT
 CUDPP = $(OBJ_DIR)/cudpp.o $(OBJ_DIR)/cudpp_plan.o \
         $(OBJ_DIR)/cudpp_maximal_launch.o $(OBJ_DIR)/cudpp_plan_manager.o \
         $(OBJ_DIR)/radixsort_app.cu_o $(OBJ_DIR)/scan_app.cu_o
 endif
 OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
        $(OBJ_DIR)/lal_neighbor.o $(OBJ_DIR)/lal_neighbor_shared.o \
        $(OBJ_DIR)/lal_device.o $(OBJ_DIR)/lal_base_atomic.o \
        $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_base_ellipsoid.o \
        $(OBJ_DIR)/lal_base_dipole.o $(OBJ_DIR)/lal_base_three.o \
        $(OBJ_DIR)/lal_base_dpd.o \
        $(OBJ_DIR)/lal_pppm.o $(OBJ_DIR)/lal_pppm_ext.o \
        $(OBJ_DIR)/lal_gayberne.o $(OBJ_DIR)/lal_gayberne_ext.o \
        $(OBJ_DIR)/lal_re_squared.o $(OBJ_DIR)/lal_re_squared_ext.o \
        $(OBJ_DIR)/lal_lj.o $(OBJ_DIR)/lal_lj_ext.o \
        $(OBJ_DIR)/lal_lj96.o $(OBJ_DIR)/lal_lj96_ext.o \
        $(OBJ_DIR)/lal_lj_expand.o $(OBJ_DIR)/lal_lj_expand_ext.o \
        $(OBJ_DIR)/lal_lj_coul.o $(OBJ_DIR)/lal_lj_coul_ext.o \
        $(OBJ_DIR)/lal_lj_coul_long.o $(OBJ_DIR)/lal_lj_coul_long_ext.o \
        $(OBJ_DIR)/lal_lj_dsf.o $(OBJ_DIR)/lal_lj_dsf_ext.o \
        $(OBJ_DIR)/lal_lj_class2_long.o $(OBJ_DIR)/lal_lj_class2_long_ext.o \
        $(OBJ_DIR)/lal_coul_long.o $(OBJ_DIR)/lal_coul_long_ext.o \
        $(OBJ_DIR)/lal_morse.o $(OBJ_DIR)/lal_morse_ext.o \
        $(OBJ_DIR)/lal_charmm_long.o $(OBJ_DIR)/lal_charmm_long_ext.o \
        $(OBJ_DIR)/lal_cg_cmm.o $(OBJ_DIR)/lal_cg_cmm_ext.o \
        $(OBJ_DIR)/lal_cg_cmm_long.o $(OBJ_DIR)/lal_cg_cmm_long_ext.o \
        $(OBJ_DIR)/lal_eam.o $(OBJ_DIR)/lal_eam_ext.o \
+       $(OBJ_DIR)/lal_eam_fs_ext.o $(OBJ_DIR)/lal_eam_alloy_ext.o \
        $(OBJ_DIR)/lal_buck.o $(OBJ_DIR)/lal_buck_ext.o \
        $(OBJ_DIR)/lal_buck_coul.o $(OBJ_DIR)/lal_buck_coul_ext.o \
        $(OBJ_DIR)/lal_buck_coul_long.o $(OBJ_DIR)/lal_buck_coul_long_ext.o \
        $(OBJ_DIR)/lal_table.o $(OBJ_DIR)/lal_table_ext.o \
        $(OBJ_DIR)/lal_yukawa.o $(OBJ_DIR)/lal_yukawa_ext.o \
        $(OBJ_DIR)/lal_born.o $(OBJ_DIR)/lal_born_ext.o \
        $(OBJ_DIR)/lal_born_coul_wolf.o $(OBJ_DIR)/lal_born_coul_wolf_ext.o \
        $(OBJ_DIR)/lal_born_coul_long.o $(OBJ_DIR)/lal_born_coul_long_ext.o \
        $(OBJ_DIR)/lal_dipole_lj.o $(OBJ_DIR)/lal_dipole_lj_ext.o \
        $(OBJ_DIR)/lal_dipole_lj_sf.o $(OBJ_DIR)/lal_dipole_lj_sf_ext.o \
        $(OBJ_DIR)/lal_colloid.o $(OBJ_DIR)/lal_colloid_ext.o \
        $(OBJ_DIR)/lal_gauss.o $(OBJ_DIR)/lal_gauss_ext.o \
        $(OBJ_DIR)/lal_yukawa_colloid.o $(OBJ_DIR)/lal_yukawa_colloid_ext.o \
        $(OBJ_DIR)/lal_lj_coul_debye.o $(OBJ_DIR)/lal_lj_coul_debye_ext.o \
        $(OBJ_DIR)/lal_coul_dsf.o $(OBJ_DIR)/lal_coul_dsf_ext.o \
        $(OBJ_DIR)/lal_sw.o $(OBJ_DIR)/lal_sw_ext.o \
        $(OBJ_DIR)/lal_beck.o $(OBJ_DIR)/lal_beck_ext.o \
        $(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \
        $(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
        $(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
        $(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
        $(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
        $(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
        $(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
        $(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
        $(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
        $(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o
 
 CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
        $(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
        $(OBJ_DIR)/neighbor_cpu.cubin $(OBJ_DIR)/neighbor_cpu_cubin.h \
        $(OBJ_DIR)/neighbor_gpu.cubin $(OBJ_DIR)/neighbor_gpu_cubin.h \
        $(OBJ_DIR)/pppm_f.cubin $(OBJ_DIR)/pppm_f_cubin.h \
        $(OBJ_DIR)/pppm_d.cubin $(OBJ_DIR)/pppm_d_cubin.h \
        $(OBJ_DIR)/ellipsoid_nbor.cubin $(OBJ_DIR)/ellipsoid_nbor_cubin.h \
        $(OBJ_DIR)/gayberne.cubin $(OBJ_DIR)/gayberne_lj.cubin \
        $(OBJ_DIR)/gayberne_cubin.h $(OBJ_DIR)/gayberne_lj_cubin.h \
        $(OBJ_DIR)/re_squared.cubin $(OBJ_DIR)/re_squared_lj.cubin \
        $(OBJ_DIR)/re_squared_cubin.h $(OBJ_DIR)/re_squared_lj_cubin.h \
        $(OBJ_DIR)/lj.cubin $(OBJ_DIR)/lj_cubin.h \
        $(OBJ_DIR)/lj96.cubin $(OBJ_DIR)/lj96_cubin.h \
        $(OBJ_DIR)/lj_expand.cubin $(OBJ_DIR)/lj_expand_cubin.h \
        $(OBJ_DIR)/lj_coul.cubin $(OBJ_DIR)/lj_coul_cubin.h \
        $(OBJ_DIR)/lj_coul_long.cubin $(OBJ_DIR)/lj_coul_long_cubin.h \
        $(OBJ_DIR)/lj_dsf.cubin $(OBJ_DIR)/lj_dsf_cubin.h \
        $(OBJ_DIR)/lj_class2_long.cubin $(OBJ_DIR)/lj_class2_long_cubin.h \
        $(OBJ_DIR)/coul_long.cubin $(OBJ_DIR)/coul_long_cubin.h \
        $(OBJ_DIR)/morse.cubin $(OBJ_DIR)/morse_cubin.h \
        $(OBJ_DIR)/charmm_long.cubin $(OBJ_DIR)/charmm_long_cubin.h \
        $(OBJ_DIR)/cg_cmm.cubin $(OBJ_DIR)/cg_cmm_cubin.h \
        $(OBJ_DIR)/cg_cmm_long.cubin $(OBJ_DIR)/cg_cmm_long_cubin.h \
        $(OBJ_DIR)/eam.cubin $(OBJ_DIR)/eam_cubin.h \
        $(OBJ_DIR)/buck.cubin $(OBJ_DIR)/buck_cubin.h \
        $(OBJ_DIR)/buck_coul_long.cubin $(OBJ_DIR)/buck_coul_long_cubin.h \
        $(OBJ_DIR)/buck_coul.cubin $(OBJ_DIR)/buck_coul_cubin.h \
        $(OBJ_DIR)/table.cubin $(OBJ_DIR)/table_cubin.h \
        $(OBJ_DIR)/yukawa.cubin $(OBJ_DIR)/yukawa_cubin.h \
        $(OBJ_DIR)/born.cubin $(OBJ_DIR)/born_cubin.h \
        $(OBJ_DIR)/born_coul_wolf.cubin $(OBJ_DIR)/born_coul_wolf_cubin.h \
        $(OBJ_DIR)/born_coul_long.cubin $(OBJ_DIR)/born_coul_long_cubin.h \
        $(OBJ_DIR)/dipole_lj.cubin $(OBJ_DIR)/dipole_lj_cubin.h \
        $(OBJ_DIR)/dipole_lj_sf.cubin $(OBJ_DIR)/dipole_lj_sf_cubin.h \
        $(OBJ_DIR)/colloid.cubin $(OBJ_DIR)/colloid_cubin.h \
        $(OBJ_DIR)/gauss.cubin $(OBJ_DIR)/gauss_cubin.h \
        $(OBJ_DIR)/yukawa_colloid.cubin $(OBJ_DIR)/yukawa_colloid_cubin.h \
        $(OBJ_DIR)/lj_coul_debye.cubin $(OBJ_DIR)/lj_coul_debye_cubin.h \
        $(OBJ_DIR)/coul_dsf.cubin $(OBJ_DIR)/coul_dsf_cubin.h \
        $(OBJ_DIR)/sw.cubin $(OBJ_DIR)/sw_cubin.h \
        $(OBJ_DIR)/beck.cubin $(OBJ_DIR)/beck_cubin.h \
        $(OBJ_DIR)/mie.cubin $(OBJ_DIR)/mie_cubin.h \
        $(OBJ_DIR)/soft.cubin $(OBJ_DIR)/soft_cubin.h \
        $(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm_cubin.h \
        $(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \
        $(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h \
        $(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff_cubin.h \
-	     $(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
+       $(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
        $(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \
        $(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \
        $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h
 
-
 all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
 
 $(OBJ_DIR):
 	mkdir -p $@
 
 $(OBJ_DIR)/cudpp.o: cudpp_mini/cudpp.cpp
 	$(CUDR) -o $@ -c cudpp_mini/cudpp.cpp -Icudpp_mini
 
 $(OBJ_DIR)/cudpp_plan.o: cudpp_mini/cudpp_plan.cpp
 	$(CUDR) -o $@ -c cudpp_mini/cudpp_plan.cpp -Icudpp_mini
 
 $(OBJ_DIR)/cudpp_maximal_launch.o: cudpp_mini/cudpp_maximal_launch.cpp
 	$(CUDR) -o $@ -c cudpp_mini/cudpp_maximal_launch.cpp -Icudpp_mini
 
 $(OBJ_DIR)/cudpp_plan_manager.o: cudpp_mini/cudpp_plan_manager.cpp
 	$(CUDR) -o $@ -c cudpp_mini/cudpp_plan_manager.cpp -Icudpp_mini
 
 $(OBJ_DIR)/radixsort_app.cu_o: cudpp_mini/radixsort_app.cu
 	$(CUDA) -o $@ -c cudpp_mini/radixsort_app.cu
 
 $(OBJ_DIR)/scan_app.cu_o: cudpp_mini/scan_app.cu
 	$(CUDA) -o $@ -c cudpp_mini/scan_app.cu
 
 $(OBJ_DIR)/atom.cubin: lal_atom.cu lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_atom.cu
 
 $(OBJ_DIR)/atom_cubin.h: $(OBJ_DIR)/atom.cubin
 	$(BIN2C) -c -n atom $(OBJ_DIR)/atom.cubin > $(OBJ_DIR)/atom_cubin.h
 
 $(OBJ_DIR)/lal_atom.o: lal_atom.cpp lal_atom.h $(NVD_H) $(OBJ_DIR)/atom_cubin.h
 	$(CUDR) -o $@ -c lal_atom.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_ans.o: lal_answer.cpp lal_answer.h $(NVD_H)
 	$(CUDR) -o $@ -c lal_answer.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/neighbor_cpu.cubin: lal_neighbor_cpu.cu lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_neighbor_cpu.cu
 
 $(OBJ_DIR)/neighbor_cpu_cubin.h: $(OBJ_DIR)/neighbor_cpu.cubin
 	$(BIN2C) -c -n neighbor_cpu $(OBJ_DIR)/neighbor_cpu.cubin > $(OBJ_DIR)/neighbor_cpu_cubin.h
 
 $(OBJ_DIR)/neighbor_gpu.cubin: lal_neighbor_gpu.cu lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_neighbor_gpu.cu
 
 $(OBJ_DIR)/neighbor_gpu_cubin.h: $(OBJ_DIR)/neighbor_gpu.cubin
 	$(BIN2C) -c -n neighbor_gpu $(OBJ_DIR)/neighbor_gpu.cubin > $(OBJ_DIR)/neighbor_gpu_cubin.h
 
 $(OBJ_DIR)/lal_neighbor_shared.o: lal_neighbor_shared.cpp lal_neighbor_shared.h $(OBJ_DIR)/neighbor_cpu_cubin.h $(OBJ_DIR)/neighbor_gpu_cubin.h $(NVD_H)
 	$(CUDR) -o $@ -c lal_neighbor_shared.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_neighbor.o: lal_neighbor.cpp lal_neighbor.h lal_neighbor_shared.h $(NVD_H)
 	$(CUDR) -o $@ -c lal_neighbor.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/device.cubin: lal_device.cu lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_device.cu
 
 $(OBJ_DIR)/device_cubin.h: $(OBJ_DIR)/device.cubin
 	$(BIN2C) -c -n device $(OBJ_DIR)/device.cubin > $(OBJ_DIR)/device_cubin.h
 
 $(OBJ_DIR)/lal_device.o: lal_device.cpp lal_device.h $(ALL_H) $(OBJ_DIR)/device_cubin.h
 	$(CUDR) -o $@ -c lal_device.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_base_atomic.o: $(ALL_H) lal_base_atomic.h lal_base_atomic.cpp
 	$(CUDR) -o $@ -c lal_base_atomic.cpp
 
 $(OBJ_DIR)/lal_base_charge.o: $(ALL_H) lal_base_charge.h lal_base_charge.cpp
 	$(CUDR) -o $@ -c lal_base_charge.cpp
 
 $(OBJ_DIR)/lal_base_ellipsoid.o: $(ALL_H) lal_base_ellipsoid.h lal_base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_cubin.h
 	$(CUDR) -o $@ -c lal_base_ellipsoid.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_base_dipole.o: $(ALL_H) lal_base_dipole.h lal_base_dipole.cpp
 	$(CUDR) -o $@ -c lal_base_dipole.cpp
 
 $(OBJ_DIR)/lal_base_three.o: $(ALL_H) lal_base_three.h lal_base_three.cpp
 	$(CUDR) -o $@ -c lal_base_three.cpp
 
 $(OBJ_DIR)/lal_base_dpd.o: $(ALL_H) lal_base_dpd.h lal_base_dpd.cpp
 	$(CUDR) -o $@ -c lal_base_dpd.cpp
 
 $(OBJ_DIR)/pppm_f.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -Dgrdtyp=float -Dgrdtyp4=float4 -o $@ lal_pppm.cu
 
 $(OBJ_DIR)/pppm_f_cubin.h: $(OBJ_DIR)/pppm_f.cubin
 	$(BIN2C) -c -n pppm_f $(OBJ_DIR)/pppm_f.cubin > $(OBJ_DIR)/pppm_f_cubin.h
 
 $(OBJ_DIR)/pppm_d.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -Dgrdtyp=double -Dgrdtyp4=double4 -o $@ lal_pppm.cu
 
 $(OBJ_DIR)/pppm_d_cubin.h: $(OBJ_DIR)/pppm_d.cubin
 	$(BIN2C) -c -n pppm_d $(OBJ_DIR)/pppm_d.cubin > $(OBJ_DIR)/pppm_d_cubin.h
 
 $(OBJ_DIR)/lal_pppm.o: $(ALL_H) lal_pppm.h lal_pppm.cpp $(OBJ_DIR)/pppm_f_cubin.h $(OBJ_DIR)/pppm_d_cubin.h
 	$(CUDR) -o $@ -c lal_pppm.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_pppm_ext.o: $(ALL_H) lal_pppm.h lal_pppm_ext.cpp
 	$(CUDR) -o $@ -c lal_pppm_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/ellipsoid_nbor.cubin: lal_ellipsoid_nbor.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_ellipsoid_nbor.cu
 
 $(OBJ_DIR)/ellipsoid_nbor_cubin.h: $(OBJ_DIR)/ellipsoid_nbor.cubin
 	$(BIN2C) -c -n ellipsoid_nbor $(OBJ_DIR)/ellipsoid_nbor.cubin > $(OBJ_DIR)/ellipsoid_nbor_cubin.h
 
 $(OBJ_DIR)/gayberne.cubin: lal_gayberne.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_gayberne.cu
 
 $(OBJ_DIR)/gayberne_lj.cubin: lal_gayberne_lj.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_gayberne_lj.cu
 
 $(OBJ_DIR)/gayberne_cubin.h: $(OBJ_DIR)/gayberne.cubin
 	$(BIN2C) -c -n gayberne $(OBJ_DIR)/gayberne.cubin > $(OBJ_DIR)/gayberne_cubin.h
 
 $(OBJ_DIR)/gayberne_lj_cubin.h: $(OBJ_DIR)/gayberne_lj.cubin
 	$(BIN2C) -c -n gayberne_lj $(OBJ_DIR)/gayberne_lj.cubin > $(OBJ_DIR)/gayberne_lj_cubin.h
 
 $(OBJ_DIR)/lal_gayberne.o: $(ALL_H) lal_gayberne.h lal_gayberne.cpp $(OBJ_DIR)/gayberne_cubin.h $(OBJ_DIR)/gayberne_lj_cubin.h $(OBJ_DIR)/lal_base_ellipsoid.o
 	$(CUDR) -o $@ -c lal_gayberne.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/lal_gayberne.o lal_gayberne_ext.cpp
 	$(CUDR) -o $@ -c lal_gayberne_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/re_squared.cubin: lal_re_squared.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_re_squared.cu
 
 $(OBJ_DIR)/re_squared_lj.cubin: lal_re_squared_lj.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_re_squared_lj.cu
 
 $(OBJ_DIR)/re_squared_cubin.h: $(OBJ_DIR)/re_squared.cubin
 	$(BIN2C) -c -n re_squared $(OBJ_DIR)/re_squared.cubin > $(OBJ_DIR)/re_squared_cubin.h
 
 $(OBJ_DIR)/re_squared_lj_cubin.h: $(OBJ_DIR)/re_squared_lj.cubin
 	$(BIN2C) -c -n re_squared_lj $(OBJ_DIR)/re_squared_lj.cubin > $(OBJ_DIR)/re_squared_lj_cubin.h
 
 $(OBJ_DIR)/lal_re_squared.o: $(ALL_H) lal_re_squared.h lal_re_squared.cpp $(OBJ_DIR)/re_squared_cubin.h $(OBJ_DIR)/re_squared_lj_cubin.h $(OBJ_DIR)/lal_base_ellipsoid.o
 	$(CUDR) -o $@ -c lal_re_squared.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/lal_re_squared.o lal_re_squared_ext.cpp
 	$(CUDR) -o $@ -c lal_re_squared_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj.cubin: lal_lj.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj.cu
 
 $(OBJ_DIR)/lj_cubin.h: $(OBJ_DIR)/lj.cubin $(OBJ_DIR)/lj.cubin
 	$(BIN2C) -c -n lj $(OBJ_DIR)/lj.cubin > $(OBJ_DIR)/lj_cubin.h
 
 $(OBJ_DIR)/lal_lj.o: $(ALL_H) lal_lj.h lal_lj.cpp $(OBJ_DIR)/lj_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_lj.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_ext.o: $(ALL_H) lal_lj.h lal_lj_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_lj_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_coul.cubin: lal_lj_coul.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_coul.cu
 
 $(OBJ_DIR)/lj_coul_cubin.h: $(OBJ_DIR)/lj_coul.cubin $(OBJ_DIR)/lj_coul.cubin
 	$(BIN2C) -c -n lj_coul $(OBJ_DIR)/lj_coul.cubin > $(OBJ_DIR)/lj_coul_cubin.h
 
 $(OBJ_DIR)/lal_lj_coul.o: $(ALL_H) lal_lj_coul.h lal_lj_coul.cpp $(OBJ_DIR)/lj_coul_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_lj_coul.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_coul_ext.o: $(ALL_H) lal_lj_coul.h lal_lj_coul_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_lj_coul_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_class2_long.cubin: lal_lj_class2_long.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_class2_long.cu
 
 $(OBJ_DIR)/lj_class2_long_cubin.h: $(OBJ_DIR)/lj_class2_long.cubin $(OBJ_DIR)/lj_class2_long.cubin
 	$(BIN2C) -c -n lj_class2_long $(OBJ_DIR)/lj_class2_long.cubin > $(OBJ_DIR)/lj_class2_long_cubin.h
 
 $(OBJ_DIR)/lal_lj_class2_long.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long.cpp $(OBJ_DIR)/lj_class2_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_lj_class2_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_class2_long_ext.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_lj_class2_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/coul_long.cubin: lal_coul_long.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul_long.cu
 
 $(OBJ_DIR)/coul_long_cubin.h: $(OBJ_DIR)/coul_long.cubin $(OBJ_DIR)/coul_long.cubin
 	$(BIN2C) -c -n coul_long $(OBJ_DIR)/coul_long.cubin > $(OBJ_DIR)/coul_long_cubin.h
 
 $(OBJ_DIR)/lal_coul_long.o: $(ALL_H) lal_coul_long.h lal_coul_long.cpp $(OBJ_DIR)/coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_coul_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_coul_long_ext.o: $(ALL_H) lal_coul_long.h lal_coul_long_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_coul_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_coul_long.cubin: lal_lj_coul_long.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_coul_long.cu
 
 $(OBJ_DIR)/lj_coul_long_cubin.h: $(OBJ_DIR)/lj_coul_long.cubin $(OBJ_DIR)/lj_coul_long.cubin
 	$(BIN2C) -c -n lj_coul_long $(OBJ_DIR)/lj_coul_long.cubin > $(OBJ_DIR)/lj_coul_long_cubin.h
 
 $(OBJ_DIR)/lal_lj_coul_long.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long.cpp $(OBJ_DIR)/lj_coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_lj_coul_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_coul_long_ext.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_lj_coul_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_dsf.cubin: lal_lj_dsf.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_dsf.cu
 
 $(OBJ_DIR)/lj_dsf_cubin.h: $(OBJ_DIR)/lj_dsf.cubin $(OBJ_DIR)/lj_dsf.cubin
 	$(BIN2C) -c -n lj_dsf $(OBJ_DIR)/lj_dsf.cubin > $(OBJ_DIR)/lj_dsf_cubin.h
 
 $(OBJ_DIR)/lal_lj_dsf.o: $(ALL_H) lal_lj_dsf.h lal_lj_dsf.cpp $(OBJ_DIR)/lj_dsf_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_lj_dsf.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_dsf_ext.o: $(ALL_H) lal_lj_dsf.h lal_lj_dsf_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_lj_dsf_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/morse.cubin: lal_morse.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_morse.cu
 
 $(OBJ_DIR)/morse_cubin.h: $(OBJ_DIR)/morse.cubin $(OBJ_DIR)/morse.cubin
 	$(BIN2C) -c -n morse $(OBJ_DIR)/morse.cubin > $(OBJ_DIR)/morse_cubin.h
 
 $(OBJ_DIR)/lal_morse.o: $(ALL_H) lal_morse.h lal_morse.cpp $(OBJ_DIR)/morse_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_morse.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_morse_ext.o: $(ALL_H) lal_morse.h lal_morse_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_morse_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/charmm_long.cubin: lal_charmm_long.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_charmm_long.cu
 
 $(OBJ_DIR)/charmm_long_cubin.h: $(OBJ_DIR)/charmm_long.cubin $(OBJ_DIR)/charmm_long.cubin
 	$(BIN2C) -c -n charmm_long $(OBJ_DIR)/charmm_long.cubin > $(OBJ_DIR)/charmm_long_cubin.h
 
 $(OBJ_DIR)/lal_charmm_long.o: $(ALL_H) lal_charmm_long.h lal_charmm_long.cpp $(OBJ_DIR)/charmm_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_charmm_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_charmm_long_ext.o: $(ALL_H) lal_charmm_long.h lal_charmm_long_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_charmm_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj96.cubin: lal_lj96.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj96.cu
 
 $(OBJ_DIR)/lj96_cubin.h: $(OBJ_DIR)/lj96.cubin $(OBJ_DIR)/lj96.cubin
 	$(BIN2C) -c -n lj96 $(OBJ_DIR)/lj96.cubin > $(OBJ_DIR)/lj96_cubin.h
 
 $(OBJ_DIR)/lal_lj96.o: $(ALL_H) lal_lj96.h lal_lj96.cpp $(OBJ_DIR)/lj96_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_lj96.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj96_ext.o: $(ALL_H) lal_lj96.h lal_lj96_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_lj96_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_expand.cubin: lal_lj_expand.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_expand.cu
 
 $(OBJ_DIR)/lj_expand_cubin.h: $(OBJ_DIR)/lj_expand.cubin $(OBJ_DIR)/lj_expand.cubin
 	$(BIN2C) -c -n lj_expand $(OBJ_DIR)/lj_expand.cubin > $(OBJ_DIR)/lj_expand_cubin.h
 
 $(OBJ_DIR)/lal_lj_expand.o: $(ALL_H) lal_lj_expand.h lal_lj_expand.cpp $(OBJ_DIR)/lj_expand_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_lj_expand.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_expand_ext.o: $(ALL_H) lal_lj_expand.h lal_lj_expand_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_lj_expand_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/cg_cmm.cubin: lal_cg_cmm.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_cg_cmm.cu
 
 $(OBJ_DIR)/cg_cmm_cubin.h: $(OBJ_DIR)/cg_cmm.cubin $(OBJ_DIR)/cg_cmm.cubin
 	$(BIN2C) -c -n cg_cmm $(OBJ_DIR)/cg_cmm.cubin > $(OBJ_DIR)/cg_cmm_cubin.h
 
 $(OBJ_DIR)/lal_cg_cmm.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm.cpp $(OBJ_DIR)/cg_cmm_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_cg_cmm.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_cg_cmm_ext.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_cg_cmm_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/cg_cmm_long.cubin: lal_cg_cmm_long.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_cg_cmm_long.cu
 
 $(OBJ_DIR)/cg_cmm_long_cubin.h: $(OBJ_DIR)/cg_cmm_long.cubin $(OBJ_DIR)/cg_cmm_long.cubin
 	$(BIN2C) -c -n cg_cmm_long $(OBJ_DIR)/cg_cmm_long.cubin > $(OBJ_DIR)/cg_cmm_long_cubin.h
 
 $(OBJ_DIR)/lal_cg_cmm_long.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long.cpp $(OBJ_DIR)/cg_cmm_long_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_cg_cmm_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_cg_cmm_long_ext.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_cg_cmm_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/eam.cubin: lal_eam.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_eam.cu
 
 $(OBJ_DIR)/eam_cubin.h: $(OBJ_DIR)/eam.cubin $(OBJ_DIR)/eam.cubin
 	$(BIN2C) -c -n eam $(OBJ_DIR)/eam.cubin > $(OBJ_DIR)/eam_cubin.h
 
 $(OBJ_DIR)/lal_eam.o: $(ALL_H) lal_eam.h lal_eam.cpp $(OBJ_DIR)/eam_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_eam.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_eam_ext.o: $(ALL_H) lal_eam.h lal_eam_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_eam_ext.cpp -I$(OBJ_DIR)
 
+$(OBJ_DIR)/lal_eam_fs_ext.o: $(ALL_H) lal_eam.h lal_eam_fs_ext.cpp lal_base_atomic.h
+	$(CUDR) -o $@ -c lal_eam_fs_ext.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_eam_alloy_ext.o: $(ALL_H) lal_eam.h lal_eam_alloy_ext.cpp lal_base_atomic.h
+	$(CUDR) -o $@ -c lal_eam_alloy_ext.cpp -I$(OBJ_DIR)
+
 $(OBJ_DIR)/buck.cubin: lal_buck.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_buck.cu
 
 $(OBJ_DIR)/buck_cubin.h: $(OBJ_DIR)/buck.cubin $(OBJ_DIR)/buck.cubin
 	$(BIN2C) -c -n buck $(OBJ_DIR)/buck.cubin > $(OBJ_DIR)/buck_cubin.h
 
 $(OBJ_DIR)/lal_buck.o: $(ALL_H) lal_buck.h lal_buck.cpp $(OBJ_DIR)/buck_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_buck.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_buck_ext.o: $(ALL_H) lal_buck.h lal_buck_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_buck_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/buck_coul.cubin: lal_buck_coul.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_buck_coul.cu
 
 $(OBJ_DIR)/buck_coul_cubin.h: $(OBJ_DIR)/buck_coul.cubin $(OBJ_DIR)/buck_coul.cubin
 	$(BIN2C) -c -n buck_coul $(OBJ_DIR)/buck_coul.cubin > $(OBJ_DIR)/buck_coul_cubin.h
 
 $(OBJ_DIR)/lal_buck_coul.o: $(ALL_H) lal_buck_coul.h lal_buck_coul.cpp $(OBJ_DIR)/buck_coul_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_buck_coul.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_buck_coul_ext.o: $(ALL_H) lal_buck_coul.h lal_buck_coul_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_buck_coul_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/buck_coul_long.cubin: lal_buck_coul_long.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_buck_coul_long.cu
 
 $(OBJ_DIR)/buck_coul_long_cubin.h: $(OBJ_DIR)/buck_coul_long.cubin $(OBJ_DIR)/buck_coul_long.cubin
 	$(BIN2C) -c -n buck_coul_long $(OBJ_DIR)/buck_coul_long.cubin > $(OBJ_DIR)/buck_coul_long_cubin.h
 
 $(OBJ_DIR)/lal_buck_coul_long.o: $(ALL_H) lal_buck_coul_long.h lal_buck_coul_long.cpp $(OBJ_DIR)/buck_coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_buck_coul_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_buck_coul_long_ext.o: $(ALL_H) lal_buck_coul_long.h lal_buck_coul_long_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_buck_coul_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/table.cubin: lal_table.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_table.cu
 
 $(OBJ_DIR)/table_cubin.h: $(OBJ_DIR)/table.cubin $(OBJ_DIR)/table.cubin
 	$(BIN2C) -c -n table $(OBJ_DIR)/table.cubin > $(OBJ_DIR)/table_cubin.h
 
 $(OBJ_DIR)/lal_table.o: $(ALL_H) lal_table.h lal_table.cpp $(OBJ_DIR)/table_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_table.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_table_ext.o: $(ALL_H) lal_table.h lal_table_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_table_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/yukawa.cubin: lal_yukawa.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_yukawa.cu
 
 $(OBJ_DIR)/yukawa_cubin.h: $(OBJ_DIR)/yukawa.cubin $(OBJ_DIR)/yukawa.cubin
 	$(BIN2C) -c -n yukawa $(OBJ_DIR)/yukawa.cubin > $(OBJ_DIR)/yukawa_cubin.h
 
 $(OBJ_DIR)/lal_yukawa.o: $(ALL_H) lal_yukawa.h lal_yukawa.cpp $(OBJ_DIR)/yukawa_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_yukawa.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_yukawa_ext.o: $(ALL_H) lal_yukawa.h lal_yukawa_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_yukawa_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/born.cubin: lal_born.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_born.cu
 
 $(OBJ_DIR)/born_cubin.h: $(OBJ_DIR)/born.cubin $(OBJ_DIR)/born.cubin
 	$(BIN2C) -c -n born $(OBJ_DIR)/born.cubin > $(OBJ_DIR)/born_cubin.h
 
 $(OBJ_DIR)/lal_born.o: $(ALL_H) lal_born.h lal_born.cpp $(OBJ_DIR)/born_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_born.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_born_ext.o: $(ALL_H) lal_born.h lal_born_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_born_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/born_coul_wolf.cubin: lal_born_coul_wolf.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_born_coul_wolf.cu
 
 $(OBJ_DIR)/born_coul_wolf_cubin.h: $(OBJ_DIR)/born_coul_wolf.cubin $(OBJ_DIR)/born_coul_wolf.cubin
 	$(BIN2C) -c -n born_coul_wolf $(OBJ_DIR)/born_coul_wolf.cubin > $(OBJ_DIR)/born_coul_wolf_cubin.h
 
 $(OBJ_DIR)/lal_born_coul_wolf.o: $(ALL_H) lal_born_coul_wolf.h lal_born_coul_wolf.cpp $(OBJ_DIR)/born_coul_wolf_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_born_coul_wolf.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_born_coul_wolf_ext.o: $(ALL_H) lal_born_coul_wolf.h lal_born_coul_wolf_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_born_coul_wolf_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/born_coul_long.cubin: lal_born_coul_long.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_born_coul_long.cu
 
 $(OBJ_DIR)/born_coul_long_cubin.h: $(OBJ_DIR)/born_coul_long.cubin $(OBJ_DIR)/born_coul_long.cubin
 	$(BIN2C) -c -n born_coul_long $(OBJ_DIR)/born_coul_long.cubin > $(OBJ_DIR)/born_coul_long_cubin.h
 
 $(OBJ_DIR)/lal_born_coul_long.o: $(ALL_H) lal_born_coul_long.h lal_born_coul_long.cpp $(OBJ_DIR)/born_coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_born_coul_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_born_coul_long_ext.o: $(ALL_H) lal_born_coul_long.h lal_born_coul_long_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_born_coul_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/dipole_lj.cubin: lal_dipole_lj.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_dipole_lj.cu
 
 $(OBJ_DIR)/dipole_lj_cubin.h: $(OBJ_DIR)/dipole_lj.cubin $(OBJ_DIR)/dipole_lj.cubin
 	$(BIN2C) -c -n dipole_lj $(OBJ_DIR)/dipole_lj.cubin > $(OBJ_DIR)/dipole_lj_cubin.h
 
 $(OBJ_DIR)/lal_dipole_lj.o: $(ALL_H) lal_dipole_lj.h lal_dipole_lj.cpp $(OBJ_DIR)/dipole_lj_cubin.h $(OBJ_DIR)/lal_base_dipole.o
 	$(CUDR) -o $@ -c lal_dipole_lj.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_dipole_lj_ext.o: $(ALL_H) lal_dipole_lj.h lal_dipole_lj_ext.cpp lal_base_dipole.h
 	$(CUDR) -o $@ -c lal_dipole_lj_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/dipole_lj_sf.cubin: lal_dipole_lj_sf.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_dipole_lj_sf.cu
 
 $(OBJ_DIR)/dipole_lj_sf_cubin.h: $(OBJ_DIR)/dipole_lj_sf.cubin $(OBJ_DIR)/dipole_lj_sf.cubin
 	$(BIN2C) -c -n dipole_lj_sf $(OBJ_DIR)/dipole_lj_sf.cubin > $(OBJ_DIR)/dipole_lj_sf_cubin.h
 
 $(OBJ_DIR)/lal_dipole_lj_sf.o: $(ALL_H) lal_dipole_lj_sf.h lal_dipole_lj_sf.cpp $(OBJ_DIR)/dipole_lj_sf_cubin.h $(OBJ_DIR)/lal_base_dipole.o
 	$(CUDR) -o $@ -c lal_dipole_lj_sf.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_dipole_lj_sf_ext.o: $(ALL_H) lal_dipole_lj_sf.h lal_dipole_lj_sf_ext.cpp lal_base_dipole.h
 	$(CUDR) -o $@ -c lal_dipole_lj_sf_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/colloid.cubin: lal_colloid.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_colloid.cu
 
 $(OBJ_DIR)/colloid_cubin.h: $(OBJ_DIR)/colloid.cubin $(OBJ_DIR)/colloid.cubin
 	$(BIN2C) -c -n colloid $(OBJ_DIR)/colloid.cubin > $(OBJ_DIR)/colloid_cubin.h
 
 $(OBJ_DIR)/lal_colloid.o: $(ALL_H) lal_colloid.h lal_colloid.cpp $(OBJ_DIR)/colloid_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_colloid.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_colloid_ext.o: $(ALL_H) lal_colloid.h lal_colloid_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_colloid_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/gauss.cubin: lal_gauss.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_gauss.cu
 
 $(OBJ_DIR)/gauss_cubin.h: $(OBJ_DIR)/gauss.cubin $(OBJ_DIR)/gauss.cubin
 	$(BIN2C) -c -n gauss $(OBJ_DIR)/gauss.cubin > $(OBJ_DIR)/gauss_cubin.h
 
 $(OBJ_DIR)/lal_gauss.o: $(ALL_H) lal_gauss.h lal_gauss.cpp $(OBJ_DIR)/gauss_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_gauss.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_gauss_ext.o: $(ALL_H) lal_gauss.h lal_gauss_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_gauss_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/yukawa_colloid.cubin: lal_yukawa_colloid.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_yukawa_colloid.cu
 
 $(OBJ_DIR)/yukawa_colloid_cubin.h: $(OBJ_DIR)/yukawa_colloid.cubin $(OBJ_DIR)/yukawa_colloid.cubin
 	$(BIN2C) -c -n yukawa_colloid $(OBJ_DIR)/yukawa_colloid.cubin > $(OBJ_DIR)/yukawa_colloid_cubin.h
 
 $(OBJ_DIR)/lal_yukawa_colloid.o: $(ALL_H) lal_yukawa_colloid.h lal_yukawa_colloid.cpp $(OBJ_DIR)/yukawa_colloid_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_yukawa_colloid.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_yukawa_colloid_ext.o: $(ALL_H) lal_yukawa_colloid.h lal_yukawa_colloid_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_yukawa_colloid_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_coul_debye.cubin: lal_lj_coul_debye.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_coul_debye.cu
 
 $(OBJ_DIR)/lj_coul_debye_cubin.h: $(OBJ_DIR)/lj_coul_debye.cubin $(OBJ_DIR)/lj_coul_debye.cubin
 	$(BIN2C) -c -n lj_coul_debye $(OBJ_DIR)/lj_coul_debye.cubin > $(OBJ_DIR)/lj_coul_debye_cubin.h
 
 $(OBJ_DIR)/lal_lj_coul_debye.o: $(ALL_H) lal_lj_coul_debye.h lal_lj_coul_debye.cpp $(OBJ_DIR)/lj_coul_debye_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_lj_coul_debye.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_coul_debye_ext.o: $(ALL_H) lal_lj_coul_debye.h lal_lj_coul_debye_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_lj_coul_debye_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/coul_dsf.cubin: lal_coul_dsf.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul_dsf.cu
 
 $(OBJ_DIR)/coul_dsf_cubin.h: $(OBJ_DIR)/coul_dsf.cubin $(OBJ_DIR)/coul_dsf.cubin
 	$(BIN2C) -c -n coul_dsf $(OBJ_DIR)/coul_dsf.cubin > $(OBJ_DIR)/coul_dsf_cubin.h
 
 $(OBJ_DIR)/lal_coul_dsf.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf.cpp $(OBJ_DIR)/coul_dsf_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_coul_dsf.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_coul_dsf_ext.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_coul_dsf_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/sw.cubin: lal_sw.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_sw.cu
 
 $(OBJ_DIR)/sw_cubin.h: $(OBJ_DIR)/sw.cubin $(OBJ_DIR)/sw.cubin
 	$(BIN2C) -c -n sw $(OBJ_DIR)/sw.cubin > $(OBJ_DIR)/sw_cubin.h
 
 $(OBJ_DIR)/lal_sw.o: $(ALL_H) lal_sw.h lal_sw.cpp $(OBJ_DIR)/sw_cubin.h $(OBJ_DIR)/lal_base_three.o
 	$(CUDR) -o $@ -c lal_sw.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_sw_ext.o: $(ALL_H) lal_sw.h lal_sw_ext.cpp lal_base_three.h
 	$(CUDR) -o $@ -c lal_sw_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/beck.cubin: lal_beck.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_beck.cu
 
 $(OBJ_DIR)/beck_cubin.h: $(OBJ_DIR)/beck.cubin $(OBJ_DIR)/beck.cubin
 	$(BIN2C) -c -n beck $(OBJ_DIR)/beck.cubin > $(OBJ_DIR)/beck_cubin.h
 
 $(OBJ_DIR)/lal_beck.o: $(ALL_H) lal_beck.h lal_beck.cpp $(OBJ_DIR)/beck_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_beck.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_beck_ext.o: $(ALL_H) lal_beck.h lal_beck_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_beck_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/mie.cubin: lal_mie.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_mie.cu
 
 $(OBJ_DIR)/mie_cubin.h: $(OBJ_DIR)/mie.cubin $(OBJ_DIR)/mie.cubin
 	$(BIN2C) -c -n mie $(OBJ_DIR)/mie.cubin > $(OBJ_DIR)/mie_cubin.h
 
 $(OBJ_DIR)/lal_mie.o: $(ALL_H) lal_mie.h lal_mie.cpp $(OBJ_DIR)/mie_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_mie.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_mie_ext.o: $(ALL_H) lal_mie.h lal_mie_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_mie_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/soft.cubin: lal_soft.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_soft.cu
 
 $(OBJ_DIR)/soft_cubin.h: $(OBJ_DIR)/soft.cubin $(OBJ_DIR)/soft.cubin
 	$(BIN2C) -c -n soft $(OBJ_DIR)/soft.cubin > $(OBJ_DIR)/soft_cubin.h
 
 $(OBJ_DIR)/lal_soft.o: $(ALL_H) lal_soft.h lal_soft.cpp $(OBJ_DIR)/soft_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_soft.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_soft_ext.o: $(ALL_H) lal_soft.h lal_soft_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_soft_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_coul_msm.cubin: lal_lj_coul_msm.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_coul_msm.cu
 
 $(OBJ_DIR)/lj_coul_msm_cubin.h: $(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm.cubin
 	$(BIN2C) -c -n lj_coul_msm $(OBJ_DIR)/lj_coul_msm.cubin > $(OBJ_DIR)/lj_coul_msm_cubin.h
 
 $(OBJ_DIR)/lal_lj_coul_msm.o: $(ALL_H) lal_lj_coul_msm.h lal_lj_coul_msm.cpp $(OBJ_DIR)/lj_coul_msm_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_lj_coul_msm.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_coul_msm_ext.o: $(ALL_H) lal_lj_coul_msm.h lal_lj_coul_msm_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_lj_coul_msm_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_gromacs.cubin: lal_lj_gromacs.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_gromacs.cu
 
 $(OBJ_DIR)/lj_gromacs_cubin.h: $(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs.cubin
 	$(BIN2C) -c -n lj_gromacs $(OBJ_DIR)/lj_gromacs.cubin > $(OBJ_DIR)/lj_gromacs_cubin.h
 
 $(OBJ_DIR)/lal_lj_gromacs.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs.cpp $(OBJ_DIR)/lj_gromacs_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_lj_gromacs.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_gromacs_ext.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_lj_gromacs_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/dpd.cubin: lal_dpd.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_dpd.cu
 
 $(OBJ_DIR)/dpd_cubin.h: $(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd.cubin
 	$(BIN2C) -c -n dpd $(OBJ_DIR)/dpd.cubin > $(OBJ_DIR)/dpd_cubin.h
 
 $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OBJ_DIR)/lal_base_dpd.o
 	$(CUDR) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
 	$(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/tersoff.cubin: lal_tersoff.cu lal_precision.h lal_tersoff_extra.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff.cu
 
 $(OBJ_DIR)/tersoff_cubin.h: $(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff.cubin
 	$(BIN2C) -c -n tersoff $(OBJ_DIR)/tersoff.cubin > $(OBJ_DIR)/tersoff_cubin.h
 
 $(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/tersoff_cubin.h $(OBJ_DIR)/lal_base_three.o
 	$(CUDR) -o $@ -c lal_tersoff.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
 	$(CUDR) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/coul.cubin: lal_coul.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul.cu
 
 $(OBJ_DIR)/coul_cubin.h: $(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul.cubin
 	$(BIN2C) -c -n coul $(OBJ_DIR)/coul.cubin > $(OBJ_DIR)/coul_cubin.h
 
 $(OBJ_DIR)/lal_coul.o: $(ALL_H) lal_coul.h lal_coul.cpp $(OBJ_DIR)/coul_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_coul.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_coul_ext.o: $(ALL_H) lal_coul.h lal_coul_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_coul_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/coul_debye.cubin: lal_coul_debye.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul_debye.cu
 
 $(OBJ_DIR)/coul_debye_cubin.h: $(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye.cubin
 	$(BIN2C) -c -n coul_debye $(OBJ_DIR)/coul_debye.cubin > $(OBJ_DIR)/coul_debye_cubin.h
 
 $(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ_DIR)/coul_debye_cubin.h $(OBJ_DIR)/lal_base_charge.o
 	$(CUDR) -o $@ -c lal_coul_debye.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/zbl.cubin: lal_zbl.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_zbl.cu
 
 $(OBJ_DIR)/zbl_cubin.h: $(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl.cubin
 	$(BIN2C) -c -n zbl $(OBJ_DIR)/zbl.cubin > $(OBJ_DIR)/zbl_cubin.h
 
 $(OBJ_DIR)/lal_zbl.o: $(ALL_H) lal_zbl.h lal_zbl.cpp $(OBJ_DIR)/zbl_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_zbl.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_zbl_ext.o: $(ALL_H) lal_zbl.h lal_zbl_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_zbl_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_cubic.cubin: lal_lj_cubic.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_cubic.cu
 
 $(OBJ_DIR)/lj_cubic_cubin.h: $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic.cubin
 	$(BIN2C) -c -n lj_cubic $(OBJ_DIR)/lj_cubic.cubin > $(OBJ_DIR)/lj_cubic_cubin.h
 
 $(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp $(OBJ_DIR)/lj_cubic_cubin.h $(OBJ_DIR)/lal_base_atomic.o
 	$(CUDR) -o $@ -c lal_lj_cubic.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h
 	$(CUDR) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR)
 
 $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
 	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda 
 
 $(GPU_LIB): $(OBJS) $(CUDPP)
 	$(AR) -crusv $(GPU_LIB) $(OBJS) $(CUDPP)
 	@cp $(EXTRAMAKE) Makefile.lammps
 
 clean:
 	rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CBNS) *.linkinfo
 
 veryclean: clean
 	rm -rf *~ *.linkinfo
 
 cleanlib:
 	rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CBNS) *.linkinfo
diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile
index 8f3f4f017..d7bae0f49 100644
--- a/lib/gpu/Opencl.makefile
+++ b/lib/gpu/Opencl.makefile
@@ -1,556 +1,563 @@
 OCL  = $(OCL_CPP) $(OCL_PREC) $(OCL_TUNE) -DUSE_OPENCL
 OCL_LIB = $(LIB_DIR)/libgpu.a
 # Headers for Geryon
 UCL_H  = $(wildcard ./geryon/ucl*.h)
 OCL_H  = $(wildcard ./geryon/ocl*.h) $(UCL_H)
 # Headers for Pair Stuff
 PAIR_H  = lal_atom.h lal_answer.h lal_neighbor_shared.h \
           lal_neighbor.h lal_precision.h lal_device.h \
           lal_balance.h lal_pppm.h
 # Headers for Preprocessor/Auxiliary Functions
 PRE1_H = lal_preprocessor.h lal_aux_fun1.h
 
 ALL_H = $(OCL_H) $(PAIR_H)
 
 EXECS = $(BIN_DIR)/ocl_get_devices
 OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
        $(OBJ_DIR)/lal_neighbor_shared.o $(OBJ_DIR)/lal_neighbor.o \
        $(OBJ_DIR)/lal_device.o $(OBJ_DIR)/lal_base_atomic.o \
        $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_base_ellipsoid.o \
        $(OBJ_DIR)/lal_base_dipole.o $(OBJ_DIR)/lal_base_three.o \
        $(OBJ_DIR)/lal_base_dpd.o \
        $(OBJ_DIR)/lal_pppm.o $(OBJ_DIR)/lal_pppm_ext.o \
        $(OBJ_DIR)/lal_gayberne.o $(OBJ_DIR)/lal_gayberne_ext.o \
        $(OBJ_DIR)/lal_re_squared.o $(OBJ_DIR)/lal_re_squared_ext.o \
        $(OBJ_DIR)/lal_lj.o $(OBJ_DIR)/lal_lj_ext.o \
        $(OBJ_DIR)/lal_lj96.o $(OBJ_DIR)/lal_lj96_ext.o \
        $(OBJ_DIR)/lal_lj_expand.o $(OBJ_DIR)/lal_lj_expand_ext.o \
        $(OBJ_DIR)/lal_lj_coul.o $(OBJ_DIR)/lal_lj_coul_ext.o \
        $(OBJ_DIR)/lal_lj_coul_long.o $(OBJ_DIR)/lal_lj_coul_long_ext.o \
        $(OBJ_DIR)/lal_lj_dsf.o $(OBJ_DIR)/lal_lj_dsf_ext.o \
        $(OBJ_DIR)/lal_lj_class2_long.o $(OBJ_DIR)/lal_lj_class2_long_ext.o \
        $(OBJ_DIR)/lal_coul_long.o $(OBJ_DIR)/lal_coul_long_ext.o \
        $(OBJ_DIR)/lal_morse.o $(OBJ_DIR)/lal_morse_ext.o \
        $(OBJ_DIR)/lal_charmm_long.o $(OBJ_DIR)/lal_charmm_long_ext.o \
        $(OBJ_DIR)/lal_cg_cmm.o $(OBJ_DIR)/lal_cg_cmm_ext.o \
        $(OBJ_DIR)/lal_cg_cmm_long.o $(OBJ_DIR)/lal_cg_cmm_long_ext.o \
        $(OBJ_DIR)/lal_eam.o $(OBJ_DIR)/lal_eam_ext.o \
+       $(OBJ_DIR)/lal_eam_fs_ext.o $(OBJ_DIR)/lal_eam_alloy_ext.o \
        $(OBJ_DIR)/lal_buck.o $(OBJ_DIR)/lal_buck_ext.o \
        $(OBJ_DIR)/lal_buck_coul.o $(OBJ_DIR)/lal_buck_coul_ext.o \
        $(OBJ_DIR)/lal_buck_coul_long.o $(OBJ_DIR)/lal_buck_coul_long_ext.o \
        $(OBJ_DIR)/lal_table.o $(OBJ_DIR)/lal_table_ext.o \
        $(OBJ_DIR)/lal_yukawa.o $(OBJ_DIR)/lal_yukawa_ext.o \
        $(OBJ_DIR)/lal_born.o $(OBJ_DIR)/lal_born_ext.o \
        $(OBJ_DIR)/lal_born_coul_wolf.o $(OBJ_DIR)/lal_born_coul_wolf_ext.o \
        $(OBJ_DIR)/lal_born_coul_long.o $(OBJ_DIR)/lal_born_coul_long_ext.o \
        $(OBJ_DIR)/lal_dipole_lj.o $(OBJ_DIR)/lal_dipole_lj_ext.o \
        $(OBJ_DIR)/lal_dipole_lj_sf.o $(OBJ_DIR)/lal_dipole_lj_sf_ext.o \
        $(OBJ_DIR)/lal_colloid.o $(OBJ_DIR)/lal_colloid_ext.o \
        $(OBJ_DIR)/lal_gauss.o $(OBJ_DIR)/lal_gauss_ext.o \
        $(OBJ_DIR)/lal_yukawa_colloid.o $(OBJ_DIR)/lal_yukawa_colloid_ext.o \
        $(OBJ_DIR)/lal_lj_coul_debye.o $(OBJ_DIR)/lal_lj_coul_debye_ext.o \
        $(OBJ_DIR)/lal_coul_dsf.o $(OBJ_DIR)/lal_coul_dsf_ext.o \
        $(OBJ_DIR)/lal_sw.o $(OBJ_DIR)/lal_sw_ext.o \
        $(OBJ_DIR)/lal_beck.o $(OBJ_DIR)/lal_beck_ext.o \
        $(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \
        $(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
        $(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
        $(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
        $(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
        $(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
        $(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
        $(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
        $(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
        $(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o
 
 KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
        $(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \
        $(OBJ_DIR)/ellipsoid_nbor_cl.h $(OBJ_DIR)/gayberne_cl.h \
        $(OBJ_DIR)/gayberne_lj_cl.h $(OBJ_DIR)/re_squared_cl.h \
        $(OBJ_DIR)/re_squared_lj_cl.h $(OBJ_DIR)/lj_cl.h $(OBJ_DIR)/lj96_cl.h \
        $(OBJ_DIR)/lj_expand_cl.h $(OBJ_DIR)/lj_coul_cl.h \
        $(OBJ_DIR)/lj_coul_long_cl.h $(OBJ_DIR)/lj_dsf_cl.h \
        $(OBJ_DIR)/lj_class2_long_cl.h \
        $(OBJ_DIR)/coul_long_cl.h $(OBJ_DIR)/morse_cl.h \
        $(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/cg_cmm_cl.h \
        $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/neighbor_gpu_cl.h \
        $(OBJ_DIR)/eam_cl.h $(OBJ_DIR)/buck_cl.h \
        $(OBJ_DIR)/buck_coul_cl.h $(OBJ_DIR)/buck_coul_long_cl.h \
        $(OBJ_DIR)/table_cl.h $(OBJ_DIR)/yukawa_cl.h \
        $(OBJ_DIR)/born_cl.h $(OBJ_DIR)/born_coul_wolf_cl.h \
        $(OBJ_DIR)/born_coul_long_cl.h $(OBJ_DIR)/dipole_lj_cl.h \
        $(OBJ_DIR)/dipole_lj_sf_cl.h $(OBJ_DIR)/colloid_cl.h \
        $(OBJ_DIR)/gauss_cl.h $(OBJ_DIR)/yukawa_colloid_cl.h \
        $(OBJ_DIR)/lj_coul_debye_cl.h $(OBJ_DIR)/coul_dsf_cl.h \
        $(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/mie_cl.h \
        $(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \
        $(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h \
        $(OBJ_DIR)/lj_gauss_cl.h $(OBJ_DIR)/dzugutov_cl.h \
        $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/coul_cl.h \
        $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \
        $(OBJ_DIR)/lj_cubic_cl.h
 
 
 OCL_EXECS = $(BIN_DIR)/ocl_get_devices
 
 all: $(OBJ_DIR) $(OCL_LIB) $(EXECS)
 
 $(OBJ_DIR):
 	mkdir -p $@
 
 $(OBJ_DIR)/atom_cl.h: lal_atom.cu lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh atom lal_preprocessor.h lal_atom.cu $(OBJ_DIR)/atom_cl.h
 
 $(OBJ_DIR)/lal_atom.o: lal_atom.cpp lal_atom.h $(OCL_H) $(OBJ_DIR)/atom_cl.h
 	$(OCL) -o $@ -c lal_atom.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_answer.o: lal_answer.cpp lal_answer.h $(OCL_H)
 	$(OCL) -o $@ -c lal_answer.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/neighbor_cpu_cl.h: lal_neighbor_cpu.cu lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh neighbor_cpu lal_preprocessor.h lal_neighbor_cpu.cu $(OBJ_DIR)/neighbor_cpu_cl.h
 
 $(OBJ_DIR)/neighbor_gpu_cl.h: lal_neighbor_gpu.cu lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh neighbor_gpu lal_preprocessor.h lal_neighbor_gpu.cu $(OBJ_DIR)/neighbor_gpu_cl.h
 
 $(OBJ_DIR)/lal_neighbor_shared.o: lal_neighbor_shared.cpp lal_neighbor_shared.h $(OCL_H) $(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/neighbor_gpu_cl.h
 	$(OCL) -o $@ -c lal_neighbor_shared.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_neighbor.o: lal_neighbor.cpp lal_neighbor.h $(OCL_H) lal_neighbor_shared.h
 	$(OCL) -o $@ -c lal_neighbor.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/device_cl.h: lal_device.cu lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh device lal_preprocessor.h lal_device.cu $(OBJ_DIR)/device_cl.h
 
 $(OBJ_DIR)/lal_device.o: lal_device.cpp lal_device.h $(ALL_H) $(OBJ_DIR)/device_cl.h
 	$(OCL) -o $@ -c lal_device.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_base_atomic.o: $(OCL_H) lal_base_atomic.h lal_base_atomic.cpp
 	$(OCL) -o $@ -c lal_base_atomic.cpp
 
 $(OBJ_DIR)/lal_base_charge.o: $(OCL_H) lal_base_charge.h lal_base_charge.cpp
 	$(OCL) -o $@ -c lal_base_charge.cpp
 
 $(OBJ_DIR)/lal_base_ellipsoid.o: $(OCL_H) lal_base_ellipsoid.h lal_base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_cl.h
 	$(OCL) -o $@ -c lal_base_ellipsoid.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_base_dipole.o: $(OCL_H) lal_base_dipole.h lal_base_dipole.cpp
 	$(OCL) -o $@ -c lal_base_dipole.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_base_three.o: $(OCL_H) lal_base_three.h lal_base_three.cpp
 	$(OCL) -o $@ -c lal_base_three.cpp
 
 $(OBJ_DIR)/lal_base_dpd.o: $(OCL_H) lal_base_dpd.h lal_base_dpd.cpp
 	$(OCL) -o $@ -c lal_base_dpd.cpp
 
 $(OBJ_DIR)/pppm_cl.h: lal_pppm.cu lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh pppm lal_preprocessor.h lal_pppm.cu $(OBJ_DIR)/pppm_cl.h;
 
 $(OBJ_DIR)/lal_pppm.o: $(ALL_H) lal_pppm.h lal_pppm.cpp  $(OBJ_DIR)/pppm_cl.h $(OBJ_DIR)/pppm_cl.h
 	$(OCL) -o $@ -c lal_pppm.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_pppm_ext.o: $(ALL_H) lal_pppm.h lal_pppm_ext.cpp
 	$(OCL) -o $@ -c lal_pppm_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/ellipsoid_nbor_cl.h: lal_ellipsoid_nbor.cu lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh ellipsoid_nbor lal_preprocessor.h lal_ellipsoid_nbor.cu $(OBJ_DIR)/ellipsoid_nbor_cl.h
 
 $(OBJ_DIR)/gayberne_cl.h: lal_gayberne.cu lal_ellipsoid_extra.h lal_aux_fun1.h lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh gayberne lal_preprocessor.h lal_aux_fun1.h lal_ellipsoid_extra.h lal_gayberne.cu $(OBJ_DIR)/gayberne_cl.h;
 
 $(OBJ_DIR)/gayberne_lj_cl.h: lal_gayberne_lj.cu lal_ellipsoid_extra.h lal_aux_fun1.h lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh gayberne_lj lal_preprocessor.h lal_aux_fun1.h lal_ellipsoid_extra.h lal_gayberne_lj.cu $(OBJ_DIR)/gayberne_lj_cl.h;
 
 $(OBJ_DIR)/lal_gayberne.o: $(ALL_H) lal_gayberne.h lal_gayberne.cpp $(OBJ_DIR)/gayberne_cl.h $(OBJ_DIR)/gayberne_lj_cl.h $(OBJ_DIR)/lal_base_ellipsoid.o
 	$(OCL) -o $@ -c lal_gayberne.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/lal_gayberne.o lal_gayberne_ext.cpp
 	$(OCL) -o $@ -c lal_gayberne_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/re_squared_cl.h: lal_re_squared.cu lal_ellipsoid_extra.h lal_aux_fun1.h lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh re_squared lal_preprocessor.h lal_aux_fun1.h lal_ellipsoid_extra.h lal_re_squared.cu $(OBJ_DIR)/re_squared_cl.h;
 
 $(OBJ_DIR)/re_squared_lj_cl.h: lal_re_squared_lj.cu lal_ellipsoid_extra.h lal_aux_fun1.h lal_preprocessor.h
 	$(BSH) ./geryon/file_to_cstr.sh re_squared_lj lal_preprocessor.h lal_aux_fun1.h lal_ellipsoid_extra.h lal_re_squared_lj.cu $(OBJ_DIR)/re_squared_lj_cl.h;
 
 $(OBJ_DIR)/lal_re_squared.o: $(ALL_H) lal_re_squared.h lal_re_squared.cpp $(OBJ_DIR)/re_squared_cl.h $(OBJ_DIR)/re_squared_lj_cl.h $(OBJ_DIR)/lal_base_ellipsoid.o
 	$(OCL) -o $@ -c lal_re_squared.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/lal_re_squared.o lal_re_squared_ext.cpp
 	$(OCL) -o $@ -c lal_re_squared_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_cl.h: lal_lj.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh lj $(PRE1_H) lal_lj.cu $(OBJ_DIR)/lj_cl.h;
 
 $(OBJ_DIR)/lal_lj.o: $(ALL_H) lal_lj.h lal_lj.cpp  $(OBJ_DIR)/lj_cl.h $(OBJ_DIR)/lj_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_lj.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_ext.o: $(ALL_H) lal_lj.h lal_lj_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_lj_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_coul_cl.h: lal_lj_coul.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh lj_coul $(PRE1_H) lal_lj_coul.cu $(OBJ_DIR)/lj_coul_cl.h;
 
 $(OBJ_DIR)/lal_lj_coul.o: $(ALL_H) lal_lj_coul.h lal_lj_coul.cpp  $(OBJ_DIR)/lj_coul_cl.h $(OBJ_DIR)/lj_coul_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_lj_coul.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_coul_ext.o: $(ALL_H) lal_lj_coul.h lal_lj_coul_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_lj_coul_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_coul_long_cl.h: lal_lj_coul_long.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh lj_coul_long $(PRE1_H) lal_lj_coul_long.cu $(OBJ_DIR)/lj_coul_long_cl.h;
 
 $(OBJ_DIR)/lal_lj_coul_long.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long.cpp  $(OBJ_DIR)/lj_coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_lj_coul_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_coul_long_ext.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_lj_coul_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_dsf_cl.h: lal_lj_dsf.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh lj_dsf $(PRE1_H) lal_lj_dsf.cu $(OBJ_DIR)/lj_dsf_cl.h;
 
 $(OBJ_DIR)/lal_lj_dsf.o: $(ALL_H) lal_lj_dsf.h lal_lj_dsf.cpp  $(OBJ_DIR)/lj_dsf_cl.h $(OBJ_DIR)/lj_dsf_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_lj_dsf.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_dsf_ext.o: $(ALL_H) lal_lj_dsf.h lal_lj_dsf_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_lj_dsf_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_class2_long_cl.h: lal_lj_class2_long.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh lj_class2_long $(PRE1_H) lal_lj_class2_long.cu $(OBJ_DIR)/lj_class2_long_cl.h;
 
 $(OBJ_DIR)/lal_lj_class2_long.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long.cpp  $(OBJ_DIR)/lj_class2_long_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_lj_class2_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_class2_long_ext.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_lj_class2_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/coul_long_cl.h: lal_coul_long.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh coul_long $(PRE1_H) lal_coul_long.cu $(OBJ_DIR)/coul_long_cl.h;
 
 $(OBJ_DIR)/lal_coul_long.o: $(ALL_H) lal_coul_long.h lal_coul_long.cpp  $(OBJ_DIR)/coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_coul_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_coul_long_ext.o: $(ALL_H) lal_coul_long.h lal_coul_long_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_coul_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/morse_cl.h: lal_morse.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh morse $(PRE1_H) lal_morse.cu $(OBJ_DIR)/morse_cl.h;
 
 $(OBJ_DIR)/lal_morse.o: $(ALL_H) lal_morse.h lal_morse.cpp  $(OBJ_DIR)/morse_cl.h $(OBJ_DIR)/morse_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_morse.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_morse_ext.o: $(ALL_H) lal_morse.h lal_morse_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_morse_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/charmm_long_cl.h: lal_charmm_long.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh charmm_long $(PRE1_H) lal_charmm_long.cu $(OBJ_DIR)/charmm_long_cl.h;
 
 $(OBJ_DIR)/lal_charmm_long.o: $(ALL_H) lal_charmm_long.h lal_charmm_long.cpp  $(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_charmm_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_charmm_long_ext.o: $(ALL_H) lal_charmm_long.h lal_charmm_long_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_charmm_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj96_cl.h: lal_lj96.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh lj96 $(PRE1_H) lal_lj96.cu $(OBJ_DIR)/lj96_cl.h;
 
 $(OBJ_DIR)/lal_lj96.o: $(ALL_H) lal_lj96.h lal_lj96.cpp  $(OBJ_DIR)/lj96_cl.h $(OBJ_DIR)/lj96_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_lj96.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj96_ext.o: $(ALL_H) lal_lj96.h lal_lj96_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_lj96_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_expand_cl.h: lal_lj_expand.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh lj_expand $(PRE1_H) lal_lj_expand.cu $(OBJ_DIR)/lj_expand_cl.h;
 
 $(OBJ_DIR)/lal_lj_expand.o: $(ALL_H) lal_lj_expand.h lal_lj_expand.cpp  $(OBJ_DIR)/lj_expand_cl.h $(OBJ_DIR)/lj_expand_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_lj_expand.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_expand_ext.o: $(ALL_H) lal_lj_expand.h lal_lj_expand_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_lj_expand_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/cg_cmm_cl.h: lal_cg_cmm.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh cg_cmm $(PRE1_H) lal_cg_cmm.cu $(OBJ_DIR)/cg_cmm_cl.h;
 
 $(OBJ_DIR)/lal_cg_cmm.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm.cpp  $(OBJ_DIR)/cg_cmm_cl.h $(OBJ_DIR)/cg_cmm_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_cg_cmm.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_cg_cmm_ext.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_cg_cmm_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/cg_cmm_long_cl.h: lal_cg_cmm_long.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh cg_cmm_long $(PRE1_H) lal_cg_cmm_long.cu $(OBJ_DIR)/cg_cmm_long_cl.h;
 
 $(OBJ_DIR)/lal_cg_cmm_long.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long.cpp  $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_cg_cmm_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_cg_cmm_long_ext.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_cg_cmm_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/eam_cl.h: lal_eam.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh eam $(PRE1_H) lal_eam.cu $(OBJ_DIR)/eam_cl.h;
 
 $(OBJ_DIR)/lal_eam.o: $(ALL_H) lal_eam.h lal_eam.cpp  $(OBJ_DIR)/eam_cl.h $(OBJ_DIR)/eam_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_eam.cpp -I$(OBJ_DIR)
 
-$(OBJ_DIR)/lal_eam_ext.o: $(ALL_H) lal_eam.h lal_eam_ext.cpp lal_base_charge.h
+$(OBJ_DIR)/lal_eam_ext.o: $(ALL_H) lal_eam.h lal_eam_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_eam_ext.cpp -I$(OBJ_DIR)
 
+$(OBJ_DIR)/lal_eam_fs_ext.o: $(ALL_H) lal_eam.h lal_eam_fs_ext.cpp lal_base_atomic.h
+	$(OCL) -o $@ -c lal_eam_fs_ext.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_eam_alloy_ext.o: $(ALL_H) lal_eam.h lal_eam_alloy_ext.cpp lal_base_atomic.h
+	$(OCL) -o $@ -c lal_eam_alloy_ext.cpp -I$(OBJ_DIR)
+
 $(OBJ_DIR)/buck_cl.h: lal_buck.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh buck $(PRE1_H) lal_buck.cu $(OBJ_DIR)/buck_cl.h;
 
 $(OBJ_DIR)/lal_buck.o: $(ALL_H) lal_buck.h lal_buck.cpp  $(OBJ_DIR)/buck_cl.h $(OBJ_DIR)/buck_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_buck.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_buck_ext.o: $(ALL_H) lal_buck.h lal_buck_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_buck_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/buck_coul_cl.h: lal_buck_coul.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh buck_coul $(PRE1_H) lal_buck_coul.cu $(OBJ_DIR)/buck_coul_cl.h;
 
 $(OBJ_DIR)/lal_buck_coul.o: $(ALL_H) lal_buck_coul.h lal_buck_coul.cpp  $(OBJ_DIR)/buck_coul_cl.h $(OBJ_DIR)/buck_coul_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_buck_coul.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_buck_coul_ext.o: $(ALL_H) lal_buck_coul.h lal_buck_coul_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_buck_coul_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/buck_coul_long_cl.h: lal_buck_coul_long.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh buck_coul_long $(PRE1_H) lal_buck_coul_long.cu $(OBJ_DIR)/buck_coul_long_cl.h;
 
 $(OBJ_DIR)/lal_buck_coul_long.o: $(ALL_H) lal_buck_coul_long.h lal_buck_coul_long.cpp  $(OBJ_DIR)/buck_coul_long_cl.h $(OBJ_DIR)/buck_coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_buck_coul_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_buck_coul_long_ext.o: $(ALL_H) lal_buck_coul_long.h lal_buck_coul_long_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_buck_coul_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/table_cl.h: lal_table.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh table $(PRE1_H) lal_table.cu $(OBJ_DIR)/table_cl.h;
 
 $(OBJ_DIR)/lal_table.o: $(ALL_H) lal_table.h lal_table.cpp  $(OBJ_DIR)/table_cl.h $(OBJ_DIR)/table_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_table.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_table_ext.o: $(ALL_H) lal_table.h lal_table_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_table_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/yukawa_cl.h: lal_yukawa.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh yukawa $(PRE1_H) lal_yukawa.cu $(OBJ_DIR)/yukawa_cl.h;
 
 $(OBJ_DIR)/lal_yukawa.o: $(ALL_H) lal_yukawa.h lal_yukawa.cpp  $(OBJ_DIR)/yukawa_cl.h $(OBJ_DIR)/yukawa_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_yukawa.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_yukawa_ext.o: $(ALL_H) lal_yukawa.h lal_yukawa_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_yukawa_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/born_cl.h: lal_born.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh born $(PRE1_H) lal_born.cu $(OBJ_DIR)/born_cl.h;
 
 $(OBJ_DIR)/lal_born.o: $(ALL_H) lal_born.h lal_born.cpp  $(OBJ_DIR)/born_cl.h $(OBJ_DIR)/born_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_born.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_born_ext.o: $(ALL_H) lal_born.h lal_born_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_born_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/born_coul_wolf_cl.h: lal_born_coul_wolf.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh born_coul_wolf $(PRE1_H) lal_born_coul_wolf.cu $(OBJ_DIR)/born_coul_wolf_cl.h;
 
 $(OBJ_DIR)/lal_born_coul_wolf.o: $(ALL_H) lal_born_coul_wolf.h lal_born_coul_wolf.cpp  $(OBJ_DIR)/born_coul_wolf_cl.h $(OBJ_DIR)/born_coul_wolf_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_born_coul_wolf.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_born_coul_wolf_ext.o: $(ALL_H) lal_born_coul_wolf.h lal_born_coul_wolf_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_born_coul_wolf_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/born_coul_long_cl.h: lal_born_coul_long.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh born_coul_long $(PRE1_H) lal_born_coul_long.cu $(OBJ_DIR)/born_coul_long_cl.h;
 
 $(OBJ_DIR)/lal_born_coul_long.o: $(ALL_H) lal_born_coul_long.h lal_born_coul_long.cpp  $(OBJ_DIR)/born_coul_long_cl.h $(OBJ_DIR)/born_coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_born_coul_long.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_born_coul_long_ext.o: $(ALL_H) lal_born_coul_long.h lal_born_coul_long_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_born_coul_long_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/dipole_lj_cl.h: lal_dipole_lj.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh dipole_lj $(PRE1_H) lal_dipole_lj.cu $(OBJ_DIR)/dipole_lj_cl.h;
 
 $(OBJ_DIR)/lal_dipole_lj.o: $(ALL_H) lal_dipole_lj.h lal_dipole_lj.cpp  $(OBJ_DIR)/dipole_lj_cl.h $(OBJ_DIR)/dipole_lj_cl.h $(OBJ_DIR)/lal_base_dipole.o
 	$(OCL) -o $@ -c lal_dipole_lj.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_dipole_lj_ext.o: $(ALL_H) lal_dipole_lj.h lal_dipole_lj_ext.cpp lal_base_dipole.h
 	$(OCL) -o $@ -c lal_dipole_lj_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/dipole_lj_sf_cl.h: lal_dipole_lj_sf.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh dipole_lj_sf $(PRE1_H) lal_dipole_lj_sf.cu $(OBJ_DIR)/dipole_lj_sf_cl.h;
 
 $(OBJ_DIR)/lal_dipole_lj_sf.o: $(ALL_H) lal_dipole_lj_sf.h lal_dipole_lj_sf.cpp  $(OBJ_DIR)/dipole_lj_sf_cl.h $(OBJ_DIR)/dipole_lj_sf_cl.h $(OBJ_DIR)/lal_base_dipole.o
 	$(OCL) -o $@ -c lal_dipole_lj_sf.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_dipole_lj_sf_ext.o: $(ALL_H) lal_dipole_lj_sf.h lal_dipole_lj_sf_ext.cpp lal_base_dipole.h
 	$(OCL) -o $@ -c lal_dipole_lj_sf_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/colloid_cl.h: lal_colloid.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh colloid $(PRE1_H) lal_colloid.cu $(OBJ_DIR)/colloid_cl.h;
 
 $(OBJ_DIR)/lal_colloid.o: $(ALL_H) lal_colloid.h lal_colloid.cpp  $(OBJ_DIR)/colloid_cl.h $(OBJ_DIR)/colloid_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_colloid.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_colloid_ext.o: $(ALL_H) lal_colloid.h lal_colloid_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_colloid_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/gauss_cl.h: lal_gauss.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh gauss $(PRE1_H) lal_gauss.cu $(OBJ_DIR)/gauss_cl.h;
 
 $(OBJ_DIR)/lal_gauss.o: $(ALL_H) lal_gauss.h lal_gauss.cpp  $(OBJ_DIR)/gauss_cl.h $(OBJ_DIR)/gauss_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_gauss.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_gauss_ext.o: $(ALL_H) lal_gauss.h lal_gauss_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_gauss_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/yukawa_colloid_cl.h: lal_yukawa_colloid.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh yukawa_colloid $(PRE1_H) lal_yukawa_colloid.cu $(OBJ_DIR)/yukawa_colloid_cl.h;
 
 $(OBJ_DIR)/lal_yukawa_colloid.o: $(ALL_H) lal_yukawa_colloid.h lal_yukawa_colloid.cpp  $(OBJ_DIR)/yukawa_colloid_cl.h $(OBJ_DIR)/yukawa_colloid_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_yukawa_colloid.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_yukawa_colloid_ext.o: $(ALL_H) lal_yukawa_colloid.h lal_yukawa_colloid_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_yukawa_colloid_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_coul_debye_cl.h: lal_lj_coul_debye.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh lj_coul_debye $(PRE1_H) lal_lj_coul_debye.cu $(OBJ_DIR)/lj_coul_debye_cl.h;
 
 $(OBJ_DIR)/lal_lj_coul_debye.o: $(ALL_H) lal_lj_coul_debye.h lal_lj_coul_debye.cpp  $(OBJ_DIR)/lj_coul_debye_cl.h $(OBJ_DIR)/lj_coul_debye_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_lj_coul_debye.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_coul_debye_ext.o: $(ALL_H) lal_lj_coul_debye.h lal_lj_coul_debye_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_lj_coul_debye_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/coul_dsf_cl.h: lal_coul_dsf.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh coul_dsf $(PRE1_H) lal_coul_dsf.cu $(OBJ_DIR)/coul_dsf_cl.h;
 
 $(OBJ_DIR)/lal_coul_dsf.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf.cpp  $(OBJ_DIR)/coul_dsf_cl.h $(OBJ_DIR)/coul_dsf_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_coul_dsf.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_coul_dsf_ext.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_coul_dsf_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/sw_cl.h: lal_sw.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh sw $(PRE1_H) lal_sw.cu $(OBJ_DIR)/sw_cl.h;
 
 $(OBJ_DIR)/lal_sw.o: $(ALL_H) lal_sw.h lal_sw.cpp  $(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/lal_base_three.o
 	$(OCL) -o $@ -c lal_sw.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_sw_ext.o: $(ALL_H) lal_sw.h lal_sw_ext.cpp lal_base_three.h
 	$(OCL) -o $@ -c lal_sw_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/beck_cl.h: lal_beck.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh beck $(PRE1_H) lal_beck.cu $(OBJ_DIR)/beck_cl.h;
 
 $(OBJ_DIR)/lal_beck.o: $(ALL_H) lal_beck.h lal_beck.cpp  $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_beck.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_beck_ext.o: $(ALL_H) lal_beck.h lal_beck_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_beck_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/mie_cl.h: lal_mie.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh mie $(PRE1_H) lal_mie.cu $(OBJ_DIR)/mie_cl.h;
 
 $(OBJ_DIR)/lal_mie.o: $(ALL_H) lal_mie.h lal_mie.cpp  $(OBJ_DIR)/mie_cl.h $(OBJ_DIR)/mie_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_mie.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_mie_ext.o: $(ALL_H) lal_mie.h lal_mie_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_mie_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/soft_cl.h: lal_soft.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh soft $(PRE1_H) lal_soft.cu $(OBJ_DIR)/soft_cl.h;
 
 $(OBJ_DIR)/lal_soft.o: $(ALL_H) lal_soft.h lal_soft.cpp  $(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_soft.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_soft_ext.o: $(ALL_H) lal_soft.h lal_soft_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_soft_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_coul_msm_cl.h: lal_lj_coul_msm.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh lj_coul_msm $(PRE1_H) lal_lj_coul_msm.cu $(OBJ_DIR)/lj_coul_msm_cl.h;
 
 $(OBJ_DIR)/lal_lj_coul_msm.o: $(ALL_H) lal_lj_coul_msm.h lal_lj_coul_msm.cpp  $(OBJ_DIR)/lj_coul_msm_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_lj_coul_msm.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_coul_msm_ext.o: $(ALL_H) lal_lj_coul_msm.h lal_lj_coul_msm_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_lj_coul_msm_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_gromacs_cl.h: lal_lj_gromacs.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh lj_gromacs $(PRE1_H) lal_lj_gromacs.cu $(OBJ_DIR)/lj_gromacs_cl.h;
 
 $(OBJ_DIR)/lal_lj_gromacs.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs.cpp  $(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_lj_gromacs.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_gromacs_ext.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_lj_gromacs_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/dpd_cl.h: lal_dpd.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh dpd $(PRE1_H) lal_dpd.cu $(OBJ_DIR)/dpd_cl.h;
 
 $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp  $(OBJ_DIR)/dpd_cl.h $(OBJ_DIR)/dpd_cl.h $(OBJ_DIR)/lal_base_dpd.o
 	$(OCL) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
 	$(OCL) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/tersoff_cl.h: lal_tersoff.cu lal_tersoff_extra.h $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh tersoff $(PRE1_H) lal_tersoff_extra.h lal_tersoff.cu $(OBJ_DIR)/tersoff_cl.h;
 
 $(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp  $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/lal_base_three.o
 	$(OCL) -o $@ -c lal_tersoff.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
 	$(OCL) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/coul_cl.h: lal_coul.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh coul $(PRE1_H) lal_coul.cu $(OBJ_DIR)/coul_cl.h;
 
 $(OBJ_DIR)/lal_coul.o: $(ALL_H) lal_coul.h lal_coul.cpp  $(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_coul.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_coul_ext.o: $(ALL_H) lal_coul.h lal_coul_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_coul_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/coul_debye_cl.h: lal_coul_debye.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh coul_debye $(PRE1_H) lal_coul_debye.cu $(OBJ_DIR)/coul_debye_cl.h;
 
 $(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp  $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/lal_base_charge.o
 	$(OCL) -o $@ -c lal_coul_debye.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h
 	$(OCL) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/zbl_cl.h: lal_zbl.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh zbl $(PRE1_H) lal_zbl.cu $(OBJ_DIR)/zbl_cl.h;
 
 $(OBJ_DIR)/lal_zbl.o: $(ALL_H) lal_zbl.h lal_zbl.cpp  $(OBJ_DIR)/zbl_cl.h $(OBJ_DIR)/zbl_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_zbl.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_zbl_ext.o: $(ALL_H) lal_zbl.h lal_zbl_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_zbl_ext.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lj_cubic_cl.h: lal_lj_cubic.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh lj_cubic $(PRE1_H) lal_lj_cubic.cu $(OBJ_DIR)/lj_cubic_cl.h;
 
 $(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp  $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/lal_base_atomic.o
 	$(OCL) -o $@ -c lal_lj_cubic.cpp -I$(OBJ_DIR)
 
 $(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h
 	$(OCL) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR)
 
 $(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp
 	$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK) 
 
 $(OCL_LIB): $(OBJS) $(PTXS)
 	$(AR) -crusv $(OCL_LIB) $(OBJS)
 	@cp $(EXTRAMAKE) Makefile.lammps
 
 opencl: $(OCL_EXECS)
 
 clean:
 	rm -rf $(EXECS) $(OCL_EXECS) $(OCL_LIB) $(OBJS) $(KERS) *.linkinfo
 
 veryclean: clean
 	rm -rf *~ *.linkinfo
 
diff --git a/lib/gpu/lal_eam_alloy_ext.cpp b/lib/gpu/lal_eam_alloy_ext.cpp
new file mode 100644
index 000000000..282f93afe
--- /dev/null
+++ b/lib/gpu/lal_eam_alloy_ext.cpp
@@ -0,0 +1,143 @@
+/***************************************************************************
+                                 eam_fs_ext.cpp
+                             -------------------
+                   Trung Dac Nguyen, W. Michael Brown (ORNL)
+
+  Functions for LAMMPS access to buck acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : brownw@ornl.gov nguyentd@ornl.gov
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <math.h>
+
+#include "lal_eam.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static EAM<PRECISION,ACC_PRECISION> EAMALMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq, 
+                 int **host_type2rhor, int **host_type2z2r, int *host_type2frho,
+                 double ***host_rhor_spline, double ***host_z2r_spline,
+                 double ***host_frho_spline,
+                 double rdr, double rdrho, double rhomax, int nrhor, 
+                 int nrho, int nz2r, int nfrho, int nr, 
+                 const int nlocal, const int nall, const int max_nbors, 
+                 const int maxspecial, const double cell_size, 
+                 int &gpu_mode, FILE *screen, int &fp_size) {
+  EAMALMF.clear();
+  gpu_mode=EAMALMF.device->gpu_mode();
+  double gpu_split=EAMALMF.device->particle_split();
+  int first_gpu=EAMALMF.device->first_device();
+  int last_gpu=EAMALMF.device->last_device();
+  int world_me=EAMALMF.device->world_me();
+  int gpu_rank=EAMALMF.device->gpu_rank();
+  int procs_per_gpu=EAMALMF.device->procs_per_gpu();
+
+  // disable host/device split for now
+  if (gpu_split != 1.0) 
+    return -8;
+    
+  fp_size=sizeof(PRECISION);
+    
+  EAMALMF.device->init_message(screen,"eam/alloy",first_gpu,last_gpu);
+
+  bool message=false;
+  if (EAMALMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=EAMALMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r,
+                       host_type2frho, host_rhor_spline, host_z2r_spline,
+                       host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, nz2r, 
+                       nfrho, nr, nlocal, nall, 300, maxspecial, cell_size,
+                       gpu_split, screen);
+
+  EAMALMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=EAMALMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r,
+                         host_type2frho, host_rhor_spline, host_z2r_spline,
+                         host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, 
+                         nz2r, nfrho, nr, nlocal, nall, 300, maxspecial,
+                         cell_size, gpu_split, screen);
+
+    EAMALMF.device->gpu_barrier();
+    if (message) 
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    EAMALMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+void eam_alloy_gpu_clear() {
+  EAMALMF.clear();
+}
+
+int ** eam_alloy_gpu_compute_n(const int ago, const int inum_full,
+                         const int nall, double **host_x, int *host_type,
+                         double *sublo, double *subhi, tagint *tag, int **nspecial, 
+                         tagint **special, const bool eflag, const bool vflag,
+                         const bool eatom, const bool vatom, int &host_start,
+                         int **ilist, int **jnum,  const double cpu_time,
+                         bool &success, int &inum, void **fp_ptr) {
+  return EAMALMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                       subhi, tag, nspecial, special, eflag, vflag, eatom,
+                       vatom, host_start, ilist, jnum, cpu_time, success,
+                       inum, fp_ptr);
+}  
+
+void eam_alloy_gpu_compute(const int ago, const int inum_full, const int nlocal, 
+                     const int nall, double **host_x, int *host_type, 
+                     int *ilist, int *numj, int **firstneigh, const bool eflag,
+                     const bool vflag, const bool eatom, const bool vatom,
+                     int &host_start, const double cpu_time, bool &success,
+                     void **fp_ptr) {
+  EAMALMF.compute(ago,inum_full,nlocal,nall,host_x,host_type,ilist,numj,
+                firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success,
+                fp_ptr);
+}
+
+void eam_alloy_gpu_compute_force(int *ilist, const bool eflag, const bool vflag,
+                      const bool eatom, const bool vatom) {
+  EAMALMF.compute2(ilist, eflag, vflag, eatom, vatom);
+}
+
+
+double eam_alloy_gpu_bytes() {
+  return EAMALMF.host_memory_usage();
+}
+
+
diff --git a/lib/gpu/lal_eam_fs_ext.cpp b/lib/gpu/lal_eam_fs_ext.cpp
new file mode 100644
index 000000000..4992f3ab9
--- /dev/null
+++ b/lib/gpu/lal_eam_fs_ext.cpp
@@ -0,0 +1,143 @@
+/***************************************************************************
+                                 eam_fs_ext.cpp
+                             -------------------
+                   Trung Dac Nguyen, W. Michael Brown (ORNL)
+
+  Functions for LAMMPS access to buck acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : brownw@ornl.gov nguyentd@ornl.gov
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <math.h>
+
+#include "lal_eam.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static EAM<PRECISION,ACC_PRECISION> EAMFSMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int eam_fs_gpu_init(const int ntypes, double host_cutforcesq, 
+                 int **host_type2rhor, int **host_type2z2r, int *host_type2frho,
+                 double ***host_rhor_spline, double ***host_z2r_spline,
+                 double ***host_frho_spline,
+                 double rdr, double rdrho, double rhomax, int nrhor, 
+                 int nrho, int nz2r, int nfrho, int nr, 
+                 const int nlocal, const int nall, const int max_nbors, 
+                 const int maxspecial, const double cell_size, 
+                 int &gpu_mode, FILE *screen, int &fp_size) {
+  EAMFSMF.clear();
+  gpu_mode=EAMFSMF.device->gpu_mode();
+  double gpu_split=EAMFSMF.device->particle_split();
+  int first_gpu=EAMFSMF.device->first_device();
+  int last_gpu=EAMFSMF.device->last_device();
+  int world_me=EAMFSMF.device->world_me();
+  int gpu_rank=EAMFSMF.device->gpu_rank();
+  int procs_per_gpu=EAMFSMF.device->procs_per_gpu();
+
+  // disable host/device split for now
+  if (gpu_split != 1.0) 
+    return -8;
+    
+  fp_size=sizeof(PRECISION);
+    
+  EAMFSMF.device->init_message(screen,"eam/fs",first_gpu,last_gpu);
+
+  bool message=false;
+  if (EAMFSMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=EAMFSMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r,
+                       host_type2frho, host_rhor_spline, host_z2r_spline,
+                       host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, nz2r, 
+                       nfrho, nr, nlocal, nall, 300, maxspecial, cell_size,
+                       gpu_split, screen);
+
+  EAMFSMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=EAMFSMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r,
+                         host_type2frho, host_rhor_spline, host_z2r_spline,
+                         host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, 
+                         nz2r, nfrho, nr, nlocal, nall, 300, maxspecial,
+                         cell_size, gpu_split, screen);
+
+    EAMFSMF.device->gpu_barrier();
+    if (message) 
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    EAMFSMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+void eam_fs_gpu_clear() {
+  EAMFSMF.clear();
+}
+
+int ** eam_fs_gpu_compute_n(const int ago, const int inum_full,
+                         const int nall, double **host_x, int *host_type,
+                         double *sublo, double *subhi, tagint *tag, int **nspecial, 
+                         tagint **special, const bool eflag, const bool vflag,
+                         const bool eatom, const bool vatom, int &host_start,
+                         int **ilist, int **jnum,  const double cpu_time,
+                         bool &success, int &inum, void **fp_ptr) {
+  return EAMFSMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                       subhi, tag, nspecial, special, eflag, vflag, eatom,
+                       vatom, host_start, ilist, jnum, cpu_time, success,
+                       inum, fp_ptr);
+}  
+
+void eam_fs_gpu_compute(const int ago, const int inum_full, const int nlocal, 
+                     const int nall, double **host_x, int *host_type, 
+                     int *ilist, int *numj, int **firstneigh, const bool eflag,
+                     const bool vflag, const bool eatom, const bool vatom,
+                     int &host_start, const double cpu_time, bool &success,
+                     void **fp_ptr) {
+  EAMFSMF.compute(ago,inum_full,nlocal,nall,host_x,host_type,ilist,numj,
+                firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success,
+                fp_ptr);
+}
+
+void eam_fs_gpu_compute_force(int *ilist, const bool eflag, const bool vflag,
+                      const bool eatom, const bool vatom) {
+  EAMFSMF.compute2(ilist, eflag, vflag, eatom, vatom);
+}
+
+
+double eam_fs_gpu_bytes() {
+  return EAMFSMF.host_memory_usage();
+}
+
+
diff --git a/src/GPU/pair_eam_alloy_gpu.cpp b/src/GPU/pair_eam_alloy_gpu.cpp
index 74b43f110..9fb914cb0 100644
--- a/src/GPU/pair_eam_alloy_gpu.cpp
+++ b/src/GPU/pair_eam_alloy_gpu.cpp
@@ -1,324 +1,563 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Trung Dac Nguyen (ORNL), W. Michael Brown (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_eam_alloy_gpu.h"
 #include "atom.h"
+#include "force.h"
 #include "comm.h"
+#include "neighbor.h"
+#include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
+#include "neigh_request.h"
+#include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 
+// External functions from cuda library for atom decomposition
+
+int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq,
+                 int **host_type2rhor, int **host_type2z2r,
+                 int *host_type2frho, double ***host_rhor_spline,
+                 double ***host_z2r_spline, double ***host_frho_spline,
+                 double rdr, double rdrho, double rhomax, 
+                 int nrhor, int nrho, int nz2r, int nfrho, int nr, 
+                 const int nlocal, const int nall, const int max_nbors, 
+                 const int maxspecial, const double cell_size, int &gpu_mode, 
+                 FILE *screen, int &fp_size);
+void eam_alloy_gpu_clear();
+int** eam_alloy_gpu_compute_n(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, double *sublo,
+                        double *subhi, tagint *tag, int **nspecial, tagint **special,
+                        const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist,
+                        int **jnum,  const double cpu_time, bool &success,
+                        int &inum, void **fp_ptr);
+void eam_alloy_gpu_compute(const int ago, const int inum_full, const int nlocal,
+                     const int nall,double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh,
+                     const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start,
+                     const double cpu_time, bool &success, void **fp_ptr);
+void eam_alloy_gpu_compute_force(int *ilist, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom);
+double eam_alloy_gpu_bytes();
+
+/* ---------------------------------------------------------------------- */
+
+PairEAMAlloyGPU::PairEAMAlloyGPU(LAMMPS *lmp) : PairEAM(lmp), gpu_mode(GPU_FORCE)
+{
+  respa_enable = 0;
+  reinitflag = 0;
+  cpu_time = 0.0;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairEAMAlloyGPU::~PairEAMAlloyGPU()
+{
+  eam_alloy_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairEAMAlloyGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + eam_alloy_gpu_bytes();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEAMAlloyGPU::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
+
+  // compute density on each atom on GPU
+
+  int nlocal = atom->nlocal;
+  int nall = nlocal + atom->nghost;
+  int inum, host_start, inum_dev;
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+  if (gpu_mode != GPU_FORCE) {
+    inum = atom->nlocal;
+    firstneigh = eam_alloy_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+                                   atom->type, domain->sublo, domain->subhi,
+                                   atom->tag, atom->nspecial, atom->special,
+                                   eflag, vflag, eflag_atom, vflag_atom,
+                                   host_start, &ilist, &numneigh, cpu_time,
+                                   success, inum_dev, &fp_pinned);
+  } else { // gpu_mode == GPU_FORCE
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    eam_alloy_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type,
+                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
+                    vflag_atom, host_start, cpu_time, success, &fp_pinned);
+  }
+
+  if (!success)
+    error->one(FLERR,"Insufficient memory on accelerator");
+
+  // communicate derivative of embedding function
+
+  comm->forward_comm_pair(this);
+
+  // compute forces on each atom on GPU
+  if (gpu_mode != GPU_FORCE)
+    eam_alloy_gpu_compute_force(NULL, eflag, vflag, eflag_atom, vflag_atom);
+  else
+    eam_alloy_gpu_compute_force(ilist, eflag, vflag, eflag_atom, vflag_atom);
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairEAMAlloyGPU::init_style()
+{
+  if (force->newton_pair)
+    error->all(FLERR,"Cannot use newton pair with eam/alloy/gpu pair style");
+
+  // convert read-in file(s) to arrays and spline them
+
+  file2array();
+  array2spline();
+
+  // Repeat cutsq calculation because done after call to init_style
+  double maxcut = -1.0;
+  double cut;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        cut = init_one(i,j);
+        cut *= cut;
+        if (cut > maxcut)
+          maxcut = cut;
+        cutsq[i][j] = cutsq[j][i] = cut;
+      } else
+        cutsq[i][j] = cutsq[j][i] = 0.0;
+    }
+  }
+  double cell_size = sqrt(maxcut) + neighbor->skin;
+
+  int maxspecial=0;
+  if (atom->molecular)
+    maxspecial=atom->maxspecial;
+  int fp_size;
+  int success = eam_alloy_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r,
+                             type2frho, rhor_spline, z2r_spline, frho_spline,
+                             rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr,
+                             atom->nlocal, atom->nlocal+atom->nghost, 300,
+                             maxspecial, cell_size, gpu_mode, screen, fp_size);
+  GPU_EXTRA::check_flag(success,error,world);
+
+  if (gpu_mode == GPU_FORCE) {
+    int irequest = neighbor->request(this,instance_me);
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full = 1;
+  }
+
+  if (fp_size == sizeof(double))
+    fp_single = false;
+  else
+    fp_single = true;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairEAMAlloyGPU::single(int i, int j, int itype, int jtype,
+                       double rsq, double factor_coul, double factor_lj,
+                       double &fforce)
+{
+  int m;
+  double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip;
+  double *coeff;
+
+  r = sqrt(rsq);
+  p = r*rdr + 1.0;
+  m = static_cast<int> (p);
+  m = MIN(m,nr-1);
+  p -= m;
+  p = MIN(p,1.0);
+
+  coeff = rhor_spline[type2rhor[itype][jtype]][m];
+  rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
+  coeff = rhor_spline[type2rhor[jtype][itype]][m];
+  rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
+  coeff = z2r_spline[type2z2r[itype][jtype]][m];
+  z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
+  z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+
+  double fp_i,fp_j;
+  if (fp_single == false) {
+    fp_i = ((double*)fp_pinned)[i];
+    fp_j = ((double*)fp_pinned)[j];
+  } else {
+    fp_i = ((float*)fp_pinned)[i];
+    fp_j = ((float*)fp_pinned)[j];
+  }
+
+  recip = 1.0/r;
+  phi = z2*recip;
+  phip = z2p*recip - phi*recip;
+  psip = fp_i*rhojp + fp_j*rhoip + phip;
+  fforce = -psip*recip;
+
+  return phi;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int PairEAMAlloyGPU::pack_forward_comm(int n, int *list, double *buf, 
+                                  int pbc_flag,int *pbc)
+{
+  int i,j,m;
+
+  m = 0;
+
+  if (fp_single) {
+    float *fp_ptr = (float *)fp_pinned;
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = static_cast<double>(fp_ptr[j]);
+    }
+  } else {
+    double *fp_ptr = (double *)fp_pinned;
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = fp_ptr[j];
+    }
+  }
+
+  return m;
+}
+
 /* ---------------------------------------------------------------------- */
 
-PairEAMAlloyGPU::PairEAMAlloyGPU(LAMMPS *lmp) : PairEAMGPU(lmp)
+void PairEAMAlloyGPU::unpack_forward_comm(int n, int first, double *buf)
 {
-  one_coeff = 1;
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  if (fp_single) {
+    float *fp_ptr = (float *)fp_pinned;
+    for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
+  } else {
+    double *fp_ptr = (double *)fp_pinned;
+    for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
+  }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
    read DYNAMO setfl file
 ------------------------------------------------------------------------- */
 
 void PairEAMAlloyGPU::coeff(int narg, char **arg)
 {
   int i,j;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read EAM setfl file
 
   if (setfl) {
     for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
     delete [] setfl->elements;
     delete [] setfl->mass;
     memory->destroy(setfl->frho);
     memory->destroy(setfl->rhor);
     memory->destroy(setfl->z2r);
     delete setfl;
   }
   setfl = new Setfl();
   read_file(arg[2]);
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
 
   for (i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < setfl->nelements; j++)
       if (strcmp(arg[i],setfl->elements[j]) == 0) break;
     if (j < setfl->nelements) map[i-2] = j;
     else error->all(FLERR,"No matching element in EAM potential file");
   }
 
   // clear setflag since coeff() called once with I,J = * *
 
   int n = atom->ntypes;
   for (i = 1; i <= n; i++)
     for (j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
   // set mass of atom type if i = j
 
   int count = 0;
   for (i = 1; i <= n; i++) {
     for (j = i; j <= n; j++) {
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         if (i == j) atom->set_mass(i,setfl->mass[map[i]]);
         count++;
       }
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    read a multi-element DYNAMO setfl file
 ------------------------------------------------------------------------- */
 
 void PairEAMAlloyGPU::read_file(char *filename)
 {
   Setfl *file = setfl;
 
   // open potential file
 
   int me = comm->me;
   FILE *fptr;
   char line[MAXLINE];
 
   if (me == 0) {
     fptr = fopen(filename,"r");
     if (fptr == NULL) {
       char str[128];
       sprintf(str,"Cannot open EAM potential file %s",filename);
       error->one(FLERR,str);
     }
   }
 
   // read and broadcast header
   // extract element names from nelements line
 
   int n;
   if (me == 0) {
     fgets(line,MAXLINE,fptr);
     fgets(line,MAXLINE,fptr);
     fgets(line,MAXLINE,fptr);
     fgets(line,MAXLINE,fptr);
     n = strlen(line) + 1;
   }
   MPI_Bcast(&n,1,MPI_INT,0,world);
   MPI_Bcast(line,n,MPI_CHAR,0,world);
 
   sscanf(line,"%d",&file->nelements);
   int nwords = atom->count_words(line);
   if (nwords != file->nelements + 1)
     error->all(FLERR,"Incorrect element names in EAM potential file");
 
   char **words = new char*[file->nelements+1];
   nwords = 0;
   strtok(line," \t\n\r\f");
   while ( (words[nwords++] = strtok(NULL," \t\n\r\f")) ) continue;
 
   file->elements = new char*[file->nelements];
   for (int i = 0; i < file->nelements; i++) {
     n = strlen(words[i]) + 1;
     file->elements[i] = new char[n];
     strcpy(file->elements[i],words[i]);
   }
   delete [] words;
 
   if (me == 0) {
     fgets(line,MAXLINE,fptr);
     sscanf(line,"%d %lg %d %lg %lg",
            &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
   }
 
   MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
   MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&file->nr,1,MPI_INT,0,world);
   MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
 
   file->mass = new double[file->nelements];
   memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho");
   memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor");
   memory->create(file->z2r,file->nelements,file->nelements,file->nr+1,
                  "pair:z2r");
 
   int i,j,tmp;
   for (i = 0; i < file->nelements; i++) {
     if (me == 0) {
       fgets(line,MAXLINE,fptr);
       sscanf(line,"%d %lg",&tmp,&file->mass[i]);
     }
     MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
 
     if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
     MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
     if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]);
     MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world);
   }
 
   for (i = 0; i < file->nelements; i++)
     for (j = 0; j <= i; j++) {
       if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
       MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
     }
 
   // close the potential file
 
   if (me == 0) fclose(fptr);
 }
 
 /* ----------------------------------------------------------------------
    copy read-in setfl potential to standard array format
 ------------------------------------------------------------------------- */
 
 void PairEAMAlloyGPU::file2array()
 {
   int i,j,m,n;
   int ntypes = atom->ntypes;
 
   // set function params directly from setfl file
 
   nrho = setfl->nrho;
   nr = setfl->nr;
   drho = setfl->drho;
   dr = setfl->dr;
   rhomax = (nrho-1) * drho;
 
   // ------------------------------------------------------------------
   // setup frho arrays
   // ------------------------------------------------------------------
 
   // allocate frho arrays
   // nfrho = # of setfl elements + 1 for zero array
 
   nfrho = setfl->nelements + 1;
   memory->destroy(frho);
   memory->create(frho,nfrho,nrho+1,"pair:frho");
 
   // copy each element's frho to global frho
 
   for (i = 0; i < setfl->nelements; i++)
     for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m];
 
   // add extra frho of zeroes for non-EAM types to point to (pair hybrid)
   // this is necessary b/c fp is still computed for non-EAM atoms
 
   for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
 
   // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
   // if atom type doesn't point to element (non-EAM atom in pair hybrid)
   // then map it to last frho array of zeroes
 
   for (i = 1; i <= ntypes; i++)
     if (map[i] >= 0) type2frho[i] = map[i];
     else type2frho[i] = nfrho-1;
 
   // ------------------------------------------------------------------
   // setup rhor arrays
   // ------------------------------------------------------------------
 
   // allocate rhor arrays
   // nrhor = # of setfl elements
 
   nrhor = setfl->nelements;
   memory->destroy(rhor);
   memory->create(rhor,nrhor,nr+1,"pair:rhor");
 
   // copy each element's rhor to global rhor
 
   for (i = 0; i < setfl->nelements; i++)
     for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m];
 
   // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
   // for setfl files, I,J mapping only depends on I
   // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
 
   for (i = 1; i <= ntypes; i++)
     for (j = 1; j <= ntypes; j++)
       type2rhor[i][j] = map[i];
 
   // ------------------------------------------------------------------
   // setup z2r arrays
   // ------------------------------------------------------------------
 
   // allocate z2r arrays
   // nz2r = N*(N+1)/2 where N = # of setfl elements
 
   nz2r = setfl->nelements * (setfl->nelements+1) / 2;
   memory->destroy(z2r);
   memory->create(z2r,nz2r,nr+1,"pair:z2r");
 
   // copy each element pair z2r to global z2r, only for I >= J
 
   n = 0;
   for (i = 0; i < setfl->nelements; i++)
     for (j = 0; j <= i; j++) {
       for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m];
       n++;
     }
 
   // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
   // set of z2r arrays only fill lower triangular Nelement matrix
   // value = n = sum over rows of lower-triangular matrix until reach irow,icol
   // swap indices when irow < icol to stay lower triangular
   // if map = -1 (non-EAM atom in pair hybrid):
   //   type2z2r is not used by non-opt
   //   but set type2z2r to 0 since accessed by opt
 
   int irow,icol;
   for (i = 1; i <= ntypes; i++) {
     for (j = 1; j <= ntypes; j++) {
       irow = map[i];
       icol = map[j];
       if (irow == -1 || icol == -1) {
         type2z2r[i][j] = 0;
         continue;
       }
       if (irow < icol) {
         irow = map[j];
         icol = map[i];
       }
       n = 0;
       for (m = 0; m < irow; m++) n += m + 1;
       n += icol;
       type2z2r[i][j] = n;
     }
   }
 }
diff --git a/src/GPU/pair_eam_alloy_gpu.h b/src/GPU/pair_eam_alloy_gpu.h
index 9513cab83..e19fd0c26 100644
--- a/src/GPU/pair_eam_alloy_gpu.h
+++ b/src/GPU/pair_eam_alloy_gpu.h
@@ -1,63 +1,77 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(eam/alloy/gpu,PairEAMAlloyGPU)
 
 #else
 
 #ifndef LMP_PAIR_EAM_ALLOY_GPU_H
 #define LMP_PAIR_EAM_ALLOY_GPU_H
 
-#include "pair_eam_gpu.h"
+#include "pair_eam.h"
 
 namespace LAMMPS_NS {
 
-class PairEAMAlloyGPU : public PairEAMGPU {
+class PairEAMAlloyGPU : public PairEAM {
 public:
   PairEAMAlloyGPU(class LAMMPS *);
-  virtual ~PairEAMAlloyGPU() {}
+  virtual ~PairEAMAlloyGPU();
   void coeff(int, char **);
+  void compute(int, int);
+  void init_style();
+  double single(int, int, int, int, double, double, double, double &);
+  double memory_usage();
+
+  int pack_forward_comm(int, int *, double *, int, int *);
+  void unpack_forward_comm(int, int, double *);
+
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  protected:
   void read_file(char *);
   void file2array();
+
+  int gpu_mode;
+  double cpu_time;
+  void *fp_pinned;
+  bool fp_single;
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: No matching element in EAM potential file
 
 The EAM potential file does not contain elements that match the
 requested elements.
 
 E: Cannot open EAM potential file %s
 
 The specified EAM potential file cannot be opened.  Check that the
 path and name are correct.
 
 E: Incorrect element names in EAM potential file
 
 The element names in the EAM file do not match those requested.
 
 */
diff --git a/src/GPU/pair_eam_fs_gpu.cpp b/src/GPU/pair_eam_fs_gpu.cpp
index a6e80547e..3c5c486a6 100644
--- a/src/GPU/pair_eam_fs_gpu.cpp
+++ b/src/GPU/pair_eam_fs_gpu.cpp
@@ -1,333 +1,572 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Trung Dac Nguyen (ORNL), W. Michael Brown (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_eam_fs_gpu.h"
 #include "atom.h"
+#include "force.h"
 #include "comm.h"
+#include "neighbor.h"
+#include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
+#include "neigh_request.h"
+#include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 
+// External functions from cuda library for atom decomposition
+
+int eam_fs_gpu_init(const int ntypes, double host_cutforcesq,
+                 int **host_type2rhor, int **host_type2z2r,
+                 int *host_type2frho, double ***host_rhor_spline,
+                 double ***host_z2r_spline, double ***host_frho_spline,
+                 double rdr, double rdrho, double rhomax, 
+                 int nrhor, int nrho, int nz2r, int nfrho, int nr, 
+                 const int nlocal, const int nall, const int max_nbors, 
+                 const int maxspecial, const double cell_size, int &gpu_mode, 
+                 FILE *screen, int &fp_size);
+void eam_fs_gpu_clear();
+int** eam_fs_gpu_compute_n(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, double *sublo,
+                        double *subhi, tagint *tag, int **nspecial, tagint **special,
+                        const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist,
+                        int **jnum,  const double cpu_time, bool &success,
+                        int &inum, void **fp_ptr);
+void eam_fs_gpu_compute(const int ago, const int inum_full, const int nlocal,
+                     const int nall,double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh,
+                     const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start,
+                     const double cpu_time, bool &success, void **fp_ptr);
+void eam_fs_gpu_compute_force(int *ilist, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom);
+double eam_fs_gpu_bytes();
+
+/* ---------------------------------------------------------------------- */
+
+PairEAMFSGPU::PairEAMFSGPU(LAMMPS *lmp) : PairEAM(lmp), gpu_mode(GPU_FORCE)
+{
+  respa_enable = 0;
+  reinitflag = 0;
+  cpu_time = 0.0;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairEAMFSGPU::~PairEAMFSGPU()
+{
+  eam_fs_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairEAMFSGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + eam_fs_gpu_bytes();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEAMFSGPU::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
+
+  // compute density on each atom on GPU
+
+  int nlocal = atom->nlocal;
+  int nall = nlocal + atom->nghost;
+  int inum, host_start, inum_dev;
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+  if (gpu_mode != GPU_FORCE) {
+    inum = atom->nlocal;
+    firstneigh = eam_fs_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
+                                   atom->type, domain->sublo, domain->subhi,
+                                   atom->tag, atom->nspecial, atom->special,
+                                   eflag, vflag, eflag_atom, vflag_atom,
+                                   host_start, &ilist, &numneigh, cpu_time,
+                                   success, inum_dev, &fp_pinned);
+  } else { // gpu_mode == GPU_FORCE
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    eam_fs_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type,
+                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
+                    vflag_atom, host_start, cpu_time, success, &fp_pinned);
+  }
+
+  if (!success)
+    error->one(FLERR,"Insufficient memory on accelerator");
+
+  // communicate derivative of embedding function
+
+  comm->forward_comm_pair(this);
+
+  // compute forces on each atom on GPU
+  if (gpu_mode != GPU_FORCE)
+    eam_fs_gpu_compute_force(NULL, eflag, vflag, eflag_atom, vflag_atom);
+  else
+    eam_fs_gpu_compute_force(ilist, eflag, vflag, eflag_atom, vflag_atom);
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairEAMFSGPU::init_style()
+{
+  if (force->newton_pair)
+    error->all(FLERR,"Cannot use newton pair with eam/fs/gpu pair style");
+
+  // convert read-in file(s) to arrays and spline them
+
+  file2array();
+  array2spline();
+
+  // Repeat cutsq calculation because done after call to init_style
+  double maxcut = -1.0;
+  double cut;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        cut = init_one(i,j);
+        cut *= cut;
+        if (cut > maxcut)
+          maxcut = cut;
+        cutsq[i][j] = cutsq[j][i] = cut;
+      } else
+        cutsq[i][j] = cutsq[j][i] = 0.0;
+    }
+  }
+  double cell_size = sqrt(maxcut) + neighbor->skin;
+
+  int maxspecial=0;
+  if (atom->molecular)
+    maxspecial=atom->maxspecial;
+  int fp_size;
+  int success = eam_fs_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r,
+                             type2frho, rhor_spline, z2r_spline, frho_spline,
+                             rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr,
+                             atom->nlocal, atom->nlocal+atom->nghost, 300,
+                             maxspecial, cell_size, gpu_mode, screen, fp_size);
+  GPU_EXTRA::check_flag(success,error,world);
+
+  if (gpu_mode == GPU_FORCE) {
+    int irequest = neighbor->request(this,instance_me);
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full = 1;
+  }
+
+  if (fp_size == sizeof(double))
+    fp_single = false;
+  else
+    fp_single = true;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairEAMFSGPU::single(int i, int j, int itype, int jtype,
+                       double rsq, double factor_coul, double factor_lj,
+                       double &fforce)
+{
+  int m;
+  double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip;
+  double *coeff;
+
+  r = sqrt(rsq);
+  p = r*rdr + 1.0;
+  m = static_cast<int> (p);
+  m = MIN(m,nr-1);
+  p -= m;
+  p = MIN(p,1.0);
+
+  coeff = rhor_spline[type2rhor[itype][jtype]][m];
+  rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
+  coeff = rhor_spline[type2rhor[jtype][itype]][m];
+  rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
+  coeff = z2r_spline[type2z2r[itype][jtype]][m];
+  z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
+  z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+
+  double fp_i,fp_j;
+  if (fp_single == false) {
+    fp_i = ((double*)fp_pinned)[i];
+    fp_j = ((double*)fp_pinned)[j];
+  } else {
+    fp_i = ((float*)fp_pinned)[i];
+    fp_j = ((float*)fp_pinned)[j];
+  }
+
+  recip = 1.0/r;
+  phi = z2*recip;
+  phip = z2p*recip - phi*recip;
+  psip = fp_i*rhojp + fp_j*rhoip + phip;
+  fforce = -psip*recip;
+
+  return phi;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int PairEAMFSGPU::pack_forward_comm(int n, int *list, double *buf, 
+                                  int pbc_flag,int *pbc)
+{
+  int i,j,m;
+
+  m = 0;
+
+  if (fp_single) {
+    float *fp_ptr = (float *)fp_pinned;
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = static_cast<double>(fp_ptr[j]);
+    }
+  } else {
+    double *fp_ptr = (double *)fp_pinned;
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = fp_ptr[j];
+    }
+  }
+
+  return m;
+}
+
 /* ---------------------------------------------------------------------- */
 
-PairEAMFSGPU::PairEAMFSGPU(LAMMPS *lmp) : PairEAMGPU(lmp)
+void PairEAMFSGPU::unpack_forward_comm(int n, int first, double *buf)
 {
-  one_coeff = 1;
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  if (fp_single) {
+    float *fp_ptr = (float *)fp_pinned;
+    for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
+  } else {
+    double *fp_ptr = (double *)fp_pinned;
+    for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
+  }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
    read EAM Finnis-Sinclair file
 ------------------------------------------------------------------------- */
 
 void PairEAMFSGPU::coeff(int narg, char **arg)
 {
   int i,j;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read EAM Finnis-Sinclair file
 
   if (fs) {
     for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i];
     delete [] fs->elements;
     delete [] fs->mass;
     memory->destroy(fs->frho);
     memory->destroy(fs->rhor);
     memory->destroy(fs->z2r);
     delete fs;
   }
   fs = new Fs();
   read_file(arg[2]);
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
 
   for (i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < fs->nelements; j++)
       if (strcmp(arg[i],fs->elements[j]) == 0) break;
     if (j < fs->nelements) map[i-2] = j;
     else error->all(FLERR,"No matching element in EAM potential file");
   }
 
   // clear setflag since coeff() called once with I,J = * *
 
   int n = atom->ntypes;
   for (i = 1; i <= n; i++)
     for (j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
   // set mass of atom type if i = j
 
   int count = 0;
   for (i = 1; i <= n; i++) {
     for (j = i; j <= n; j++) {
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         if (i == j) atom->set_mass(i,fs->mass[map[i]]);
         count++;
       }
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    read a multi-element DYNAMO setfl file
 ------------------------------------------------------------------------- */
 
 void PairEAMFSGPU::read_file(char *filename)
 {
   Fs *file = fs;
 
   // open potential file
 
   int me = comm->me;
   FILE *fptr;
   char line[MAXLINE];
 
   if (me == 0) {
-    fptr = fopen(filename,"r");
+    fptr = force->open_potential(filename);
     if (fptr == NULL) {
       char str[128];
       sprintf(str,"Cannot open EAM potential file %s",filename);
       error->one(FLERR,str);
     }
   }
 
   // read and broadcast header
   // extract element names from nelements line
 
   int n;
   if (me == 0) {
     fgets(line,MAXLINE,fptr);
     fgets(line,MAXLINE,fptr);
     fgets(line,MAXLINE,fptr);
     fgets(line,MAXLINE,fptr);
     n = strlen(line) + 1;
   }
   MPI_Bcast(&n,1,MPI_INT,0,world);
   MPI_Bcast(line,n,MPI_CHAR,0,world);
 
   sscanf(line,"%d",&file->nelements);
   int nwords = atom->count_words(line);
   if (nwords != file->nelements + 1)
     error->all(FLERR,"Incorrect element names in EAM potential file");
 
   char **words = new char*[file->nelements+1];
   nwords = 0;
   strtok(line," \t\n\r\f");
-  while ( (words[nwords++] = strtok(NULL," \t\n\r\f")) ) continue;
+  while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
 
   file->elements = new char*[file->nelements];
   for (int i = 0; i < file->nelements; i++) {
     n = strlen(words[i]) + 1;
     file->elements[i] = new char[n];
     strcpy(file->elements[i],words[i]);
   }
   delete [] words;
 
   if (me == 0) {
     fgets(line,MAXLINE,fptr);
     sscanf(line,"%d %lg %d %lg %lg",
            &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
   }
 
   MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
   MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&file->nr,1,MPI_INT,0,world);
   MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
 
   file->mass = new double[file->nelements];
   memory->create(file->frho,file->nelements,file->nrho+1,
                                               "pair:frho");
   memory->create(file->rhor,file->nelements,file->nelements,
                  file->nr+1,"pair:rhor");
   memory->create(file->z2r,file->nelements,file->nelements,
                  file->nr+1,"pair:z2r");
 
   int i,j,tmp;
   for (i = 0; i < file->nelements; i++) {
     if (me == 0) {
       fgets(line,MAXLINE,fptr);
       sscanf(line,"%d %lg",&tmp,&file->mass[i]);
     }
     MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
 
     if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
     MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
 
     for (j = 0; j < file->nelements; j++) {
       if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]);
       MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world);
     }
   }
 
   for (i = 0; i < file->nelements; i++)
     for (j = 0; j <= i; j++) {
       if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
       MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
     }
 
   // close the potential file
 
   if (me == 0) fclose(fptr);
 }
 
 /* ----------------------------------------------------------------------
    copy read-in setfl potential to standard array format
 ------------------------------------------------------------------------- */
 
 void PairEAMFSGPU::file2array()
 {
   int i,j,m,n;
   int ntypes = atom->ntypes;
 
   // set function params directly from fs file
 
   nrho = fs->nrho;
   nr = fs->nr;
   drho = fs->drho;
   dr = fs->dr;
   rhomax = (nrho-1) * drho;
 
   // ------------------------------------------------------------------
   // setup frho arrays
   // ------------------------------------------------------------------
 
   // allocate frho arrays
   // nfrho = # of fs elements + 1 for zero array
 
   nfrho = fs->nelements + 1;
   memory->destroy(frho);
   memory->create(frho,nfrho,nrho+1,"pair:frho");
 
   // copy each element's frho to global frho
 
   for (i = 0; i < fs->nelements; i++)
     for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m];
 
   // add extra frho of zeroes for non-EAM types to point to (pair hybrid)
   // this is necessary b/c fp is still computed for non-EAM atoms
 
   for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
 
   // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
   // if atom type doesn't point to element (non-EAM atom in pair hybrid)
   // then map it to last frho array of zeroes
 
   for (i = 1; i <= ntypes; i++)
     if (map[i] >= 0) type2frho[i] = map[i];
     else type2frho[i] = nfrho-1;
 
   // ------------------------------------------------------------------
   // setup rhor arrays
   // ------------------------------------------------------------------
 
   // allocate rhor arrays
   // nrhor = square of # of fs elements
 
   nrhor = fs->nelements * fs->nelements;
   memory->destroy(rhor);
   memory->create(rhor,nrhor,nr+1,"pair:rhor");
 
   // copy each element pair rhor to global rhor
 
   n = 0;
   for (i = 0; i < fs->nelements; i++)
     for (j = 0; j < fs->nelements; j++) {
       for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m];
       n++;
     }
 
   // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
   // for fs files, there is a full NxN set of rhor arrays
   // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
 
   for (i = 1; i <= ntypes; i++)
     for (j = 1; j <= ntypes; j++)
       type2rhor[i][j] = map[i] * fs->nelements + map[j];
 
   // ------------------------------------------------------------------
   // setup z2r arrays
   // ------------------------------------------------------------------
 
   // allocate z2r arrays
   // nz2r = N*(N+1)/2 where N = # of fs elements
 
   nz2r = fs->nelements * (fs->nelements+1) / 2;
   memory->destroy(z2r);
   memory->create(z2r,nz2r,nr+1,"pair:z2r");
 
   // copy each element pair z2r to global z2r, only for I >= J
 
   n = 0;
   for (i = 0; i < fs->nelements; i++)
     for (j = 0; j <= i; j++) {
       for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m];
       n++;
     }
 
   // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
   // set of z2r arrays only fill lower triangular Nelement matrix
   // value = n = sum over rows of lower-triangular matrix until reach irow,icol
   // swap indices when irow < icol to stay lower triangular
   // if map = -1 (non-EAM atom in pair hybrid):
   //   type2z2r is not used by non-opt
   //   but set type2z2r to 0 since accessed by opt
 
   int irow,icol;
   for (i = 1; i <= ntypes; i++) {
     for (j = 1; j <= ntypes; j++) {
       irow = map[i];
       icol = map[j];
       if (irow == -1 || icol == -1) {
         type2z2r[i][j] = 0;
         continue;
       }
       if (irow < icol) {
         irow = map[j];
         icol = map[i];
       }
       n = 0;
       for (m = 0; m < irow; m++) n += m + 1;
       n += icol;
       type2z2r[i][j] = n;
     }
   }
 }
diff --git a/src/GPU/pair_eam_fs_gpu.h b/src/GPU/pair_eam_fs_gpu.h
index d61d398a8..9ef436c3f 100644
--- a/src/GPU/pair_eam_fs_gpu.h
+++ b/src/GPU/pair_eam_fs_gpu.h
@@ -1,63 +1,77 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(eam/fs/gpu,PairEAMFSGPU)
 
 #else
 
 #ifndef LMP_PAIR_EAM_FS_GPU_H
 #define LMP_PAIR_EAM_FS_GPU_H
 
-#include "pair_eam_gpu.h"
+#include "pair_eam.h"
 
 namespace LAMMPS_NS {
 
-class PairEAMFSGPU : public PairEAMGPU {
+class PairEAMFSGPU : public PairEAM {
 public:
   PairEAMFSGPU(class LAMMPS *);
-  virtual ~PairEAMFSGPU() {}
+  virtual ~PairEAMFSGPU();
   void coeff(int, char **);
+  void compute(int, int);
+  void init_style();
+  double single(int, int, int, int, double, double, double, double &);
+  double memory_usage();
+
+  int pack_forward_comm(int, int *, double *, int, int *);
+  void unpack_forward_comm(int, int, double *);
+
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
 
  protected:
   void read_file(char *);
   void file2array();
+
+  int gpu_mode;
+  double cpu_time;
+  void *fp_pinned;
+  bool fp_single;
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: No matching element in EAM potential file
 
 The EAM potential file does not contain elements that match the
 requested elements.
 
 E: Cannot open EAM potential file %s
 
 The specified EAM potential file cannot be opened.  Check that the
 path and name are correct.
 
 E: Incorrect element names in EAM potential file
 
 The element names in the EAM file do not match those requested.
 
 */