diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile index 74cee0ee0..004b38764 100644 --- a/lib/gpu/Nvidia.makefile +++ b/lib/gpu/Nvidia.makefile @@ -1,764 +1,770 @@ CUDA = $(NVCC) $(CUDA_INCLUDE) $(CUDA_OPTS) -Icudpp_mini $(CUDA_ARCH) \ $(CUDA_PRECISION) CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_PRECISION) $(CUDA_INCLUDE) \ $(CUDPP_OPT) CUDA_LINK = $(CUDA_LIB) -lcudart BIN2C = $(CUDA_HOME)/bin/bin2c GPU_LIB = $(LIB_DIR)/libgpu.a # Headers for Geryon UCL_H = $(wildcard ./geryon/ucl*.h) NVC_H = $(wildcard ./geryon/nvc*.h) $(UCL_H) NVD_H = $(wildcard ./geryon/nvd*.h) $(UCL_H) lal_preprocessor.h # Headers for Pair Stuff PAIR_H = lal_atom.h lal_answer.h lal_neighbor_shared.h \ lal_neighbor.h lal_precision.h lal_device.h \ lal_balance.h lal_pppm.h ALL_H = $(NVD_H) $(PAIR_H) EXECS = $(BIN_DIR)/nvc_get_devices ifdef CUDPP_OPT CUDPP = $(OBJ_DIR)/cudpp.o $(OBJ_DIR)/cudpp_plan.o \ $(OBJ_DIR)/cudpp_maximal_launch.o $(OBJ_DIR)/cudpp_plan_manager.o \ $(OBJ_DIR)/radixsort_app.cu_o $(OBJ_DIR)/scan_app.cu_o endif OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \ $(OBJ_DIR)/lal_neighbor.o $(OBJ_DIR)/lal_neighbor_shared.o \ $(OBJ_DIR)/lal_device.o $(OBJ_DIR)/lal_base_atomic.o \ $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_base_ellipsoid.o \ $(OBJ_DIR)/lal_base_dipole.o $(OBJ_DIR)/lal_base_three.o \ $(OBJ_DIR)/lal_base_dpd.o \ $(OBJ_DIR)/lal_pppm.o $(OBJ_DIR)/lal_pppm_ext.o \ $(OBJ_DIR)/lal_gayberne.o $(OBJ_DIR)/lal_gayberne_ext.o \ $(OBJ_DIR)/lal_re_squared.o $(OBJ_DIR)/lal_re_squared_ext.o \ $(OBJ_DIR)/lal_lj.o $(OBJ_DIR)/lal_lj_ext.o \ $(OBJ_DIR)/lal_lj96.o $(OBJ_DIR)/lal_lj96_ext.o \ $(OBJ_DIR)/lal_lj_expand.o $(OBJ_DIR)/lal_lj_expand_ext.o \ $(OBJ_DIR)/lal_lj_coul.o $(OBJ_DIR)/lal_lj_coul_ext.o \ $(OBJ_DIR)/lal_lj_coul_long.o $(OBJ_DIR)/lal_lj_coul_long_ext.o \ $(OBJ_DIR)/lal_lj_dsf.o $(OBJ_DIR)/lal_lj_dsf_ext.o \ $(OBJ_DIR)/lal_lj_class2_long.o $(OBJ_DIR)/lal_lj_class2_long_ext.o \ $(OBJ_DIR)/lal_coul_long.o $(OBJ_DIR)/lal_coul_long_ext.o \ $(OBJ_DIR)/lal_morse.o $(OBJ_DIR)/lal_morse_ext.o \ $(OBJ_DIR)/lal_charmm_long.o $(OBJ_DIR)/lal_charmm_long_ext.o \ $(OBJ_DIR)/lal_cg_cmm.o $(OBJ_DIR)/lal_cg_cmm_ext.o \ $(OBJ_DIR)/lal_cg_cmm_long.o $(OBJ_DIR)/lal_cg_cmm_long_ext.o \ $(OBJ_DIR)/lal_eam.o $(OBJ_DIR)/lal_eam_ext.o \ + $(OBJ_DIR)/lal_eam_fs_ext.o $(OBJ_DIR)/lal_eam_alloy_ext.o \ $(OBJ_DIR)/lal_buck.o $(OBJ_DIR)/lal_buck_ext.o \ $(OBJ_DIR)/lal_buck_coul.o $(OBJ_DIR)/lal_buck_coul_ext.o \ $(OBJ_DIR)/lal_buck_coul_long.o $(OBJ_DIR)/lal_buck_coul_long_ext.o \ $(OBJ_DIR)/lal_table.o $(OBJ_DIR)/lal_table_ext.o \ $(OBJ_DIR)/lal_yukawa.o $(OBJ_DIR)/lal_yukawa_ext.o \ $(OBJ_DIR)/lal_born.o $(OBJ_DIR)/lal_born_ext.o \ $(OBJ_DIR)/lal_born_coul_wolf.o $(OBJ_DIR)/lal_born_coul_wolf_ext.o \ $(OBJ_DIR)/lal_born_coul_long.o $(OBJ_DIR)/lal_born_coul_long_ext.o \ $(OBJ_DIR)/lal_dipole_lj.o $(OBJ_DIR)/lal_dipole_lj_ext.o \ $(OBJ_DIR)/lal_dipole_lj_sf.o $(OBJ_DIR)/lal_dipole_lj_sf_ext.o \ $(OBJ_DIR)/lal_colloid.o $(OBJ_DIR)/lal_colloid_ext.o \ $(OBJ_DIR)/lal_gauss.o $(OBJ_DIR)/lal_gauss_ext.o \ $(OBJ_DIR)/lal_yukawa_colloid.o $(OBJ_DIR)/lal_yukawa_colloid_ext.o \ $(OBJ_DIR)/lal_lj_coul_debye.o $(OBJ_DIR)/lal_lj_coul_debye_ext.o \ $(OBJ_DIR)/lal_coul_dsf.o $(OBJ_DIR)/lal_coul_dsf_ext.o \ $(OBJ_DIR)/lal_sw.o $(OBJ_DIR)/lal_sw_ext.o \ $(OBJ_DIR)/lal_beck.o $(OBJ_DIR)/lal_beck_ext.o \ $(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \ $(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \ $(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \ $(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \ $(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \ $(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \ $(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \ $(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \ $(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \ $(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \ $(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \ $(OBJ_DIR)/neighbor_cpu.cubin $(OBJ_DIR)/neighbor_cpu_cubin.h \ $(OBJ_DIR)/neighbor_gpu.cubin $(OBJ_DIR)/neighbor_gpu_cubin.h \ $(OBJ_DIR)/pppm_f.cubin $(OBJ_DIR)/pppm_f_cubin.h \ $(OBJ_DIR)/pppm_d.cubin $(OBJ_DIR)/pppm_d_cubin.h \ $(OBJ_DIR)/ellipsoid_nbor.cubin $(OBJ_DIR)/ellipsoid_nbor_cubin.h \ $(OBJ_DIR)/gayberne.cubin $(OBJ_DIR)/gayberne_lj.cubin \ $(OBJ_DIR)/gayberne_cubin.h $(OBJ_DIR)/gayberne_lj_cubin.h \ $(OBJ_DIR)/re_squared.cubin $(OBJ_DIR)/re_squared_lj.cubin \ $(OBJ_DIR)/re_squared_cubin.h $(OBJ_DIR)/re_squared_lj_cubin.h \ $(OBJ_DIR)/lj.cubin $(OBJ_DIR)/lj_cubin.h \ $(OBJ_DIR)/lj96.cubin $(OBJ_DIR)/lj96_cubin.h \ $(OBJ_DIR)/lj_expand.cubin $(OBJ_DIR)/lj_expand_cubin.h \ $(OBJ_DIR)/lj_coul.cubin $(OBJ_DIR)/lj_coul_cubin.h \ $(OBJ_DIR)/lj_coul_long.cubin $(OBJ_DIR)/lj_coul_long_cubin.h \ $(OBJ_DIR)/lj_dsf.cubin $(OBJ_DIR)/lj_dsf_cubin.h \ $(OBJ_DIR)/lj_class2_long.cubin $(OBJ_DIR)/lj_class2_long_cubin.h \ $(OBJ_DIR)/coul_long.cubin $(OBJ_DIR)/coul_long_cubin.h \ $(OBJ_DIR)/morse.cubin $(OBJ_DIR)/morse_cubin.h \ $(OBJ_DIR)/charmm_long.cubin $(OBJ_DIR)/charmm_long_cubin.h \ $(OBJ_DIR)/cg_cmm.cubin $(OBJ_DIR)/cg_cmm_cubin.h \ $(OBJ_DIR)/cg_cmm_long.cubin $(OBJ_DIR)/cg_cmm_long_cubin.h \ $(OBJ_DIR)/eam.cubin $(OBJ_DIR)/eam_cubin.h \ $(OBJ_DIR)/buck.cubin $(OBJ_DIR)/buck_cubin.h \ $(OBJ_DIR)/buck_coul_long.cubin $(OBJ_DIR)/buck_coul_long_cubin.h \ $(OBJ_DIR)/buck_coul.cubin $(OBJ_DIR)/buck_coul_cubin.h \ $(OBJ_DIR)/table.cubin $(OBJ_DIR)/table_cubin.h \ $(OBJ_DIR)/yukawa.cubin $(OBJ_DIR)/yukawa_cubin.h \ $(OBJ_DIR)/born.cubin $(OBJ_DIR)/born_cubin.h \ $(OBJ_DIR)/born_coul_wolf.cubin $(OBJ_DIR)/born_coul_wolf_cubin.h \ $(OBJ_DIR)/born_coul_long.cubin $(OBJ_DIR)/born_coul_long_cubin.h \ $(OBJ_DIR)/dipole_lj.cubin $(OBJ_DIR)/dipole_lj_cubin.h \ $(OBJ_DIR)/dipole_lj_sf.cubin $(OBJ_DIR)/dipole_lj_sf_cubin.h \ $(OBJ_DIR)/colloid.cubin $(OBJ_DIR)/colloid_cubin.h \ $(OBJ_DIR)/gauss.cubin $(OBJ_DIR)/gauss_cubin.h \ $(OBJ_DIR)/yukawa_colloid.cubin $(OBJ_DIR)/yukawa_colloid_cubin.h \ $(OBJ_DIR)/lj_coul_debye.cubin $(OBJ_DIR)/lj_coul_debye_cubin.h \ $(OBJ_DIR)/coul_dsf.cubin $(OBJ_DIR)/coul_dsf_cubin.h \ $(OBJ_DIR)/sw.cubin $(OBJ_DIR)/sw_cubin.h \ $(OBJ_DIR)/beck.cubin $(OBJ_DIR)/beck_cubin.h \ $(OBJ_DIR)/mie.cubin $(OBJ_DIR)/mie_cubin.h \ $(OBJ_DIR)/soft.cubin $(OBJ_DIR)/soft_cubin.h \ $(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm_cubin.h \ $(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \ $(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h \ $(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff_cubin.h \ - $(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \ + $(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \ $(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \ $(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \ $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h - all: $(OBJ_DIR) $(GPU_LIB) $(EXECS) $(OBJ_DIR): mkdir -p $@ $(OBJ_DIR)/cudpp.o: cudpp_mini/cudpp.cpp $(CUDR) -o $@ -c cudpp_mini/cudpp.cpp -Icudpp_mini $(OBJ_DIR)/cudpp_plan.o: cudpp_mini/cudpp_plan.cpp $(CUDR) -o $@ -c cudpp_mini/cudpp_plan.cpp -Icudpp_mini $(OBJ_DIR)/cudpp_maximal_launch.o: cudpp_mini/cudpp_maximal_launch.cpp $(CUDR) -o $@ -c cudpp_mini/cudpp_maximal_launch.cpp -Icudpp_mini $(OBJ_DIR)/cudpp_plan_manager.o: cudpp_mini/cudpp_plan_manager.cpp $(CUDR) -o $@ -c cudpp_mini/cudpp_plan_manager.cpp -Icudpp_mini $(OBJ_DIR)/radixsort_app.cu_o: cudpp_mini/radixsort_app.cu $(CUDA) -o $@ -c cudpp_mini/radixsort_app.cu $(OBJ_DIR)/scan_app.cu_o: cudpp_mini/scan_app.cu $(CUDA) -o $@ -c cudpp_mini/scan_app.cu $(OBJ_DIR)/atom.cubin: lal_atom.cu lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_atom.cu $(OBJ_DIR)/atom_cubin.h: $(OBJ_DIR)/atom.cubin $(BIN2C) -c -n atom $(OBJ_DIR)/atom.cubin > $(OBJ_DIR)/atom_cubin.h $(OBJ_DIR)/lal_atom.o: lal_atom.cpp lal_atom.h $(NVD_H) $(OBJ_DIR)/atom_cubin.h $(CUDR) -o $@ -c lal_atom.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_ans.o: lal_answer.cpp lal_answer.h $(NVD_H) $(CUDR) -o $@ -c lal_answer.cpp -I$(OBJ_DIR) $(OBJ_DIR)/neighbor_cpu.cubin: lal_neighbor_cpu.cu lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_neighbor_cpu.cu $(OBJ_DIR)/neighbor_cpu_cubin.h: $(OBJ_DIR)/neighbor_cpu.cubin $(BIN2C) -c -n neighbor_cpu $(OBJ_DIR)/neighbor_cpu.cubin > $(OBJ_DIR)/neighbor_cpu_cubin.h $(OBJ_DIR)/neighbor_gpu.cubin: lal_neighbor_gpu.cu lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_neighbor_gpu.cu $(OBJ_DIR)/neighbor_gpu_cubin.h: $(OBJ_DIR)/neighbor_gpu.cubin $(BIN2C) -c -n neighbor_gpu $(OBJ_DIR)/neighbor_gpu.cubin > $(OBJ_DIR)/neighbor_gpu_cubin.h $(OBJ_DIR)/lal_neighbor_shared.o: lal_neighbor_shared.cpp lal_neighbor_shared.h $(OBJ_DIR)/neighbor_cpu_cubin.h $(OBJ_DIR)/neighbor_gpu_cubin.h $(NVD_H) $(CUDR) -o $@ -c lal_neighbor_shared.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_neighbor.o: lal_neighbor.cpp lal_neighbor.h lal_neighbor_shared.h $(NVD_H) $(CUDR) -o $@ -c lal_neighbor.cpp -I$(OBJ_DIR) $(OBJ_DIR)/device.cubin: lal_device.cu lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_device.cu $(OBJ_DIR)/device_cubin.h: $(OBJ_DIR)/device.cubin $(BIN2C) -c -n device $(OBJ_DIR)/device.cubin > $(OBJ_DIR)/device_cubin.h $(OBJ_DIR)/lal_device.o: lal_device.cpp lal_device.h $(ALL_H) $(OBJ_DIR)/device_cubin.h $(CUDR) -o $@ -c lal_device.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_base_atomic.o: $(ALL_H) lal_base_atomic.h lal_base_atomic.cpp $(CUDR) -o $@ -c lal_base_atomic.cpp $(OBJ_DIR)/lal_base_charge.o: $(ALL_H) lal_base_charge.h lal_base_charge.cpp $(CUDR) -o $@ -c lal_base_charge.cpp $(OBJ_DIR)/lal_base_ellipsoid.o: $(ALL_H) lal_base_ellipsoid.h lal_base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_cubin.h $(CUDR) -o $@ -c lal_base_ellipsoid.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_base_dipole.o: $(ALL_H) lal_base_dipole.h lal_base_dipole.cpp $(CUDR) -o $@ -c lal_base_dipole.cpp $(OBJ_DIR)/lal_base_three.o: $(ALL_H) lal_base_three.h lal_base_three.cpp $(CUDR) -o $@ -c lal_base_three.cpp $(OBJ_DIR)/lal_base_dpd.o: $(ALL_H) lal_base_dpd.h lal_base_dpd.cpp $(CUDR) -o $@ -c lal_base_dpd.cpp $(OBJ_DIR)/pppm_f.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -Dgrdtyp=float -Dgrdtyp4=float4 -o $@ lal_pppm.cu $(OBJ_DIR)/pppm_f_cubin.h: $(OBJ_DIR)/pppm_f.cubin $(BIN2C) -c -n pppm_f $(OBJ_DIR)/pppm_f.cubin > $(OBJ_DIR)/pppm_f_cubin.h $(OBJ_DIR)/pppm_d.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -Dgrdtyp=double -Dgrdtyp4=double4 -o $@ lal_pppm.cu $(OBJ_DIR)/pppm_d_cubin.h: $(OBJ_DIR)/pppm_d.cubin $(BIN2C) -c -n pppm_d $(OBJ_DIR)/pppm_d.cubin > $(OBJ_DIR)/pppm_d_cubin.h $(OBJ_DIR)/lal_pppm.o: $(ALL_H) lal_pppm.h lal_pppm.cpp $(OBJ_DIR)/pppm_f_cubin.h $(OBJ_DIR)/pppm_d_cubin.h $(CUDR) -o $@ -c lal_pppm.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_pppm_ext.o: $(ALL_H) lal_pppm.h lal_pppm_ext.cpp $(CUDR) -o $@ -c lal_pppm_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/ellipsoid_nbor.cubin: lal_ellipsoid_nbor.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_ellipsoid_nbor.cu $(OBJ_DIR)/ellipsoid_nbor_cubin.h: $(OBJ_DIR)/ellipsoid_nbor.cubin $(BIN2C) -c -n ellipsoid_nbor $(OBJ_DIR)/ellipsoid_nbor.cubin > $(OBJ_DIR)/ellipsoid_nbor_cubin.h $(OBJ_DIR)/gayberne.cubin: lal_gayberne.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_gayberne.cu $(OBJ_DIR)/gayberne_lj.cubin: lal_gayberne_lj.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_gayberne_lj.cu $(OBJ_DIR)/gayberne_cubin.h: $(OBJ_DIR)/gayberne.cubin $(BIN2C) -c -n gayberne $(OBJ_DIR)/gayberne.cubin > $(OBJ_DIR)/gayberne_cubin.h $(OBJ_DIR)/gayberne_lj_cubin.h: $(OBJ_DIR)/gayberne_lj.cubin $(BIN2C) -c -n gayberne_lj $(OBJ_DIR)/gayberne_lj.cubin > $(OBJ_DIR)/gayberne_lj_cubin.h $(OBJ_DIR)/lal_gayberne.o: $(ALL_H) lal_gayberne.h lal_gayberne.cpp $(OBJ_DIR)/gayberne_cubin.h $(OBJ_DIR)/gayberne_lj_cubin.h $(OBJ_DIR)/lal_base_ellipsoid.o $(CUDR) -o $@ -c lal_gayberne.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/lal_gayberne.o lal_gayberne_ext.cpp $(CUDR) -o $@ -c lal_gayberne_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/re_squared.cubin: lal_re_squared.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_re_squared.cu $(OBJ_DIR)/re_squared_lj.cubin: lal_re_squared_lj.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_re_squared_lj.cu $(OBJ_DIR)/re_squared_cubin.h: $(OBJ_DIR)/re_squared.cubin $(BIN2C) -c -n re_squared $(OBJ_DIR)/re_squared.cubin > $(OBJ_DIR)/re_squared_cubin.h $(OBJ_DIR)/re_squared_lj_cubin.h: $(OBJ_DIR)/re_squared_lj.cubin $(BIN2C) -c -n re_squared_lj $(OBJ_DIR)/re_squared_lj.cubin > $(OBJ_DIR)/re_squared_lj_cubin.h $(OBJ_DIR)/lal_re_squared.o: $(ALL_H) lal_re_squared.h lal_re_squared.cpp $(OBJ_DIR)/re_squared_cubin.h $(OBJ_DIR)/re_squared_lj_cubin.h $(OBJ_DIR)/lal_base_ellipsoid.o $(CUDR) -o $@ -c lal_re_squared.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/lal_re_squared.o lal_re_squared_ext.cpp $(CUDR) -o $@ -c lal_re_squared_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj.cubin: lal_lj.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj.cu $(OBJ_DIR)/lj_cubin.h: $(OBJ_DIR)/lj.cubin $(OBJ_DIR)/lj.cubin $(BIN2C) -c -n lj $(OBJ_DIR)/lj.cubin > $(OBJ_DIR)/lj_cubin.h $(OBJ_DIR)/lal_lj.o: $(ALL_H) lal_lj.h lal_lj.cpp $(OBJ_DIR)/lj_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_lj.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_ext.o: $(ALL_H) lal_lj.h lal_lj_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_lj_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_coul.cubin: lal_lj_coul.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_coul.cu $(OBJ_DIR)/lj_coul_cubin.h: $(OBJ_DIR)/lj_coul.cubin $(OBJ_DIR)/lj_coul.cubin $(BIN2C) -c -n lj_coul $(OBJ_DIR)/lj_coul.cubin > $(OBJ_DIR)/lj_coul_cubin.h $(OBJ_DIR)/lal_lj_coul.o: $(ALL_H) lal_lj_coul.h lal_lj_coul.cpp $(OBJ_DIR)/lj_coul_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_lj_coul.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_coul_ext.o: $(ALL_H) lal_lj_coul.h lal_lj_coul_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_lj_coul_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_class2_long.cubin: lal_lj_class2_long.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_class2_long.cu $(OBJ_DIR)/lj_class2_long_cubin.h: $(OBJ_DIR)/lj_class2_long.cubin $(OBJ_DIR)/lj_class2_long.cubin $(BIN2C) -c -n lj_class2_long $(OBJ_DIR)/lj_class2_long.cubin > $(OBJ_DIR)/lj_class2_long_cubin.h $(OBJ_DIR)/lal_lj_class2_long.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long.cpp $(OBJ_DIR)/lj_class2_long_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_lj_class2_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_class2_long_ext.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_lj_class2_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/coul_long.cubin: lal_coul_long.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul_long.cu $(OBJ_DIR)/coul_long_cubin.h: $(OBJ_DIR)/coul_long.cubin $(OBJ_DIR)/coul_long.cubin $(BIN2C) -c -n coul_long $(OBJ_DIR)/coul_long.cubin > $(OBJ_DIR)/coul_long_cubin.h $(OBJ_DIR)/lal_coul_long.o: $(ALL_H) lal_coul_long.h lal_coul_long.cpp $(OBJ_DIR)/coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_coul_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_coul_long_ext.o: $(ALL_H) lal_coul_long.h lal_coul_long_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_coul_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_coul_long.cubin: lal_lj_coul_long.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_coul_long.cu $(OBJ_DIR)/lj_coul_long_cubin.h: $(OBJ_DIR)/lj_coul_long.cubin $(OBJ_DIR)/lj_coul_long.cubin $(BIN2C) -c -n lj_coul_long $(OBJ_DIR)/lj_coul_long.cubin > $(OBJ_DIR)/lj_coul_long_cubin.h $(OBJ_DIR)/lal_lj_coul_long.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long.cpp $(OBJ_DIR)/lj_coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_lj_coul_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_coul_long_ext.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_lj_coul_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_dsf.cubin: lal_lj_dsf.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_dsf.cu $(OBJ_DIR)/lj_dsf_cubin.h: $(OBJ_DIR)/lj_dsf.cubin $(OBJ_DIR)/lj_dsf.cubin $(BIN2C) -c -n lj_dsf $(OBJ_DIR)/lj_dsf.cubin > $(OBJ_DIR)/lj_dsf_cubin.h $(OBJ_DIR)/lal_lj_dsf.o: $(ALL_H) lal_lj_dsf.h lal_lj_dsf.cpp $(OBJ_DIR)/lj_dsf_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_lj_dsf.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_dsf_ext.o: $(ALL_H) lal_lj_dsf.h lal_lj_dsf_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_lj_dsf_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/morse.cubin: lal_morse.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_morse.cu $(OBJ_DIR)/morse_cubin.h: $(OBJ_DIR)/morse.cubin $(OBJ_DIR)/morse.cubin $(BIN2C) -c -n morse $(OBJ_DIR)/morse.cubin > $(OBJ_DIR)/morse_cubin.h $(OBJ_DIR)/lal_morse.o: $(ALL_H) lal_morse.h lal_morse.cpp $(OBJ_DIR)/morse_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_morse.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_morse_ext.o: $(ALL_H) lal_morse.h lal_morse_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_morse_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/charmm_long.cubin: lal_charmm_long.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_charmm_long.cu $(OBJ_DIR)/charmm_long_cubin.h: $(OBJ_DIR)/charmm_long.cubin $(OBJ_DIR)/charmm_long.cubin $(BIN2C) -c -n charmm_long $(OBJ_DIR)/charmm_long.cubin > $(OBJ_DIR)/charmm_long_cubin.h $(OBJ_DIR)/lal_charmm_long.o: $(ALL_H) lal_charmm_long.h lal_charmm_long.cpp $(OBJ_DIR)/charmm_long_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_charmm_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_charmm_long_ext.o: $(ALL_H) lal_charmm_long.h lal_charmm_long_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_charmm_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj96.cubin: lal_lj96.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj96.cu $(OBJ_DIR)/lj96_cubin.h: $(OBJ_DIR)/lj96.cubin $(OBJ_DIR)/lj96.cubin $(BIN2C) -c -n lj96 $(OBJ_DIR)/lj96.cubin > $(OBJ_DIR)/lj96_cubin.h $(OBJ_DIR)/lal_lj96.o: $(ALL_H) lal_lj96.h lal_lj96.cpp $(OBJ_DIR)/lj96_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_lj96.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj96_ext.o: $(ALL_H) lal_lj96.h lal_lj96_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_lj96_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_expand.cubin: lal_lj_expand.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_expand.cu $(OBJ_DIR)/lj_expand_cubin.h: $(OBJ_DIR)/lj_expand.cubin $(OBJ_DIR)/lj_expand.cubin $(BIN2C) -c -n lj_expand $(OBJ_DIR)/lj_expand.cubin > $(OBJ_DIR)/lj_expand_cubin.h $(OBJ_DIR)/lal_lj_expand.o: $(ALL_H) lal_lj_expand.h lal_lj_expand.cpp $(OBJ_DIR)/lj_expand_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_lj_expand.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_expand_ext.o: $(ALL_H) lal_lj_expand.h lal_lj_expand_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_lj_expand_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/cg_cmm.cubin: lal_cg_cmm.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_cg_cmm.cu $(OBJ_DIR)/cg_cmm_cubin.h: $(OBJ_DIR)/cg_cmm.cubin $(OBJ_DIR)/cg_cmm.cubin $(BIN2C) -c -n cg_cmm $(OBJ_DIR)/cg_cmm.cubin > $(OBJ_DIR)/cg_cmm_cubin.h $(OBJ_DIR)/lal_cg_cmm.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm.cpp $(OBJ_DIR)/cg_cmm_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_cg_cmm.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_cg_cmm_ext.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_cg_cmm_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/cg_cmm_long.cubin: lal_cg_cmm_long.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_cg_cmm_long.cu $(OBJ_DIR)/cg_cmm_long_cubin.h: $(OBJ_DIR)/cg_cmm_long.cubin $(OBJ_DIR)/cg_cmm_long.cubin $(BIN2C) -c -n cg_cmm_long $(OBJ_DIR)/cg_cmm_long.cubin > $(OBJ_DIR)/cg_cmm_long_cubin.h $(OBJ_DIR)/lal_cg_cmm_long.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long.cpp $(OBJ_DIR)/cg_cmm_long_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_cg_cmm_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_cg_cmm_long_ext.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_cg_cmm_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/eam.cubin: lal_eam.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_eam.cu $(OBJ_DIR)/eam_cubin.h: $(OBJ_DIR)/eam.cubin $(OBJ_DIR)/eam.cubin $(BIN2C) -c -n eam $(OBJ_DIR)/eam.cubin > $(OBJ_DIR)/eam_cubin.h $(OBJ_DIR)/lal_eam.o: $(ALL_H) lal_eam.h lal_eam.cpp $(OBJ_DIR)/eam_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_eam.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_eam_ext.o: $(ALL_H) lal_eam.h lal_eam_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_eam_ext.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/lal_eam_fs_ext.o: $(ALL_H) lal_eam.h lal_eam_fs_ext.cpp lal_base_atomic.h + $(CUDR) -o $@ -c lal_eam_fs_ext.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lal_eam_alloy_ext.o: $(ALL_H) lal_eam.h lal_eam_alloy_ext.cpp lal_base_atomic.h + $(CUDR) -o $@ -c lal_eam_alloy_ext.cpp -I$(OBJ_DIR) + $(OBJ_DIR)/buck.cubin: lal_buck.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_buck.cu $(OBJ_DIR)/buck_cubin.h: $(OBJ_DIR)/buck.cubin $(OBJ_DIR)/buck.cubin $(BIN2C) -c -n buck $(OBJ_DIR)/buck.cubin > $(OBJ_DIR)/buck_cubin.h $(OBJ_DIR)/lal_buck.o: $(ALL_H) lal_buck.h lal_buck.cpp $(OBJ_DIR)/buck_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_buck.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_buck_ext.o: $(ALL_H) lal_buck.h lal_buck_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_buck_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/buck_coul.cubin: lal_buck_coul.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_buck_coul.cu $(OBJ_DIR)/buck_coul_cubin.h: $(OBJ_DIR)/buck_coul.cubin $(OBJ_DIR)/buck_coul.cubin $(BIN2C) -c -n buck_coul $(OBJ_DIR)/buck_coul.cubin > $(OBJ_DIR)/buck_coul_cubin.h $(OBJ_DIR)/lal_buck_coul.o: $(ALL_H) lal_buck_coul.h lal_buck_coul.cpp $(OBJ_DIR)/buck_coul_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_buck_coul.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_buck_coul_ext.o: $(ALL_H) lal_buck_coul.h lal_buck_coul_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_buck_coul_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/buck_coul_long.cubin: lal_buck_coul_long.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_buck_coul_long.cu $(OBJ_DIR)/buck_coul_long_cubin.h: $(OBJ_DIR)/buck_coul_long.cubin $(OBJ_DIR)/buck_coul_long.cubin $(BIN2C) -c -n buck_coul_long $(OBJ_DIR)/buck_coul_long.cubin > $(OBJ_DIR)/buck_coul_long_cubin.h $(OBJ_DIR)/lal_buck_coul_long.o: $(ALL_H) lal_buck_coul_long.h lal_buck_coul_long.cpp $(OBJ_DIR)/buck_coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_buck_coul_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_buck_coul_long_ext.o: $(ALL_H) lal_buck_coul_long.h lal_buck_coul_long_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_buck_coul_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/table.cubin: lal_table.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_table.cu $(OBJ_DIR)/table_cubin.h: $(OBJ_DIR)/table.cubin $(OBJ_DIR)/table.cubin $(BIN2C) -c -n table $(OBJ_DIR)/table.cubin > $(OBJ_DIR)/table_cubin.h $(OBJ_DIR)/lal_table.o: $(ALL_H) lal_table.h lal_table.cpp $(OBJ_DIR)/table_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_table.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_table_ext.o: $(ALL_H) lal_table.h lal_table_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_table_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/yukawa.cubin: lal_yukawa.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_yukawa.cu $(OBJ_DIR)/yukawa_cubin.h: $(OBJ_DIR)/yukawa.cubin $(OBJ_DIR)/yukawa.cubin $(BIN2C) -c -n yukawa $(OBJ_DIR)/yukawa.cubin > $(OBJ_DIR)/yukawa_cubin.h $(OBJ_DIR)/lal_yukawa.o: $(ALL_H) lal_yukawa.h lal_yukawa.cpp $(OBJ_DIR)/yukawa_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_yukawa.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_yukawa_ext.o: $(ALL_H) lal_yukawa.h lal_yukawa_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_yukawa_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/born.cubin: lal_born.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_born.cu $(OBJ_DIR)/born_cubin.h: $(OBJ_DIR)/born.cubin $(OBJ_DIR)/born.cubin $(BIN2C) -c -n born $(OBJ_DIR)/born.cubin > $(OBJ_DIR)/born_cubin.h $(OBJ_DIR)/lal_born.o: $(ALL_H) lal_born.h lal_born.cpp $(OBJ_DIR)/born_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_born.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_born_ext.o: $(ALL_H) lal_born.h lal_born_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_born_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/born_coul_wolf.cubin: lal_born_coul_wolf.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_born_coul_wolf.cu $(OBJ_DIR)/born_coul_wolf_cubin.h: $(OBJ_DIR)/born_coul_wolf.cubin $(OBJ_DIR)/born_coul_wolf.cubin $(BIN2C) -c -n born_coul_wolf $(OBJ_DIR)/born_coul_wolf.cubin > $(OBJ_DIR)/born_coul_wolf_cubin.h $(OBJ_DIR)/lal_born_coul_wolf.o: $(ALL_H) lal_born_coul_wolf.h lal_born_coul_wolf.cpp $(OBJ_DIR)/born_coul_wolf_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_born_coul_wolf.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_born_coul_wolf_ext.o: $(ALL_H) lal_born_coul_wolf.h lal_born_coul_wolf_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_born_coul_wolf_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/born_coul_long.cubin: lal_born_coul_long.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_born_coul_long.cu $(OBJ_DIR)/born_coul_long_cubin.h: $(OBJ_DIR)/born_coul_long.cubin $(OBJ_DIR)/born_coul_long.cubin $(BIN2C) -c -n born_coul_long $(OBJ_DIR)/born_coul_long.cubin > $(OBJ_DIR)/born_coul_long_cubin.h $(OBJ_DIR)/lal_born_coul_long.o: $(ALL_H) lal_born_coul_long.h lal_born_coul_long.cpp $(OBJ_DIR)/born_coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_born_coul_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_born_coul_long_ext.o: $(ALL_H) lal_born_coul_long.h lal_born_coul_long_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_born_coul_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/dipole_lj.cubin: lal_dipole_lj.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_dipole_lj.cu $(OBJ_DIR)/dipole_lj_cubin.h: $(OBJ_DIR)/dipole_lj.cubin $(OBJ_DIR)/dipole_lj.cubin $(BIN2C) -c -n dipole_lj $(OBJ_DIR)/dipole_lj.cubin > $(OBJ_DIR)/dipole_lj_cubin.h $(OBJ_DIR)/lal_dipole_lj.o: $(ALL_H) lal_dipole_lj.h lal_dipole_lj.cpp $(OBJ_DIR)/dipole_lj_cubin.h $(OBJ_DIR)/lal_base_dipole.o $(CUDR) -o $@ -c lal_dipole_lj.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_dipole_lj_ext.o: $(ALL_H) lal_dipole_lj.h lal_dipole_lj_ext.cpp lal_base_dipole.h $(CUDR) -o $@ -c lal_dipole_lj_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/dipole_lj_sf.cubin: lal_dipole_lj_sf.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_dipole_lj_sf.cu $(OBJ_DIR)/dipole_lj_sf_cubin.h: $(OBJ_DIR)/dipole_lj_sf.cubin $(OBJ_DIR)/dipole_lj_sf.cubin $(BIN2C) -c -n dipole_lj_sf $(OBJ_DIR)/dipole_lj_sf.cubin > $(OBJ_DIR)/dipole_lj_sf_cubin.h $(OBJ_DIR)/lal_dipole_lj_sf.o: $(ALL_H) lal_dipole_lj_sf.h lal_dipole_lj_sf.cpp $(OBJ_DIR)/dipole_lj_sf_cubin.h $(OBJ_DIR)/lal_base_dipole.o $(CUDR) -o $@ -c lal_dipole_lj_sf.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_dipole_lj_sf_ext.o: $(ALL_H) lal_dipole_lj_sf.h lal_dipole_lj_sf_ext.cpp lal_base_dipole.h $(CUDR) -o $@ -c lal_dipole_lj_sf_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/colloid.cubin: lal_colloid.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_colloid.cu $(OBJ_DIR)/colloid_cubin.h: $(OBJ_DIR)/colloid.cubin $(OBJ_DIR)/colloid.cubin $(BIN2C) -c -n colloid $(OBJ_DIR)/colloid.cubin > $(OBJ_DIR)/colloid_cubin.h $(OBJ_DIR)/lal_colloid.o: $(ALL_H) lal_colloid.h lal_colloid.cpp $(OBJ_DIR)/colloid_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_colloid.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_colloid_ext.o: $(ALL_H) lal_colloid.h lal_colloid_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_colloid_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/gauss.cubin: lal_gauss.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_gauss.cu $(OBJ_DIR)/gauss_cubin.h: $(OBJ_DIR)/gauss.cubin $(OBJ_DIR)/gauss.cubin $(BIN2C) -c -n gauss $(OBJ_DIR)/gauss.cubin > $(OBJ_DIR)/gauss_cubin.h $(OBJ_DIR)/lal_gauss.o: $(ALL_H) lal_gauss.h lal_gauss.cpp $(OBJ_DIR)/gauss_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_gauss.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_gauss_ext.o: $(ALL_H) lal_gauss.h lal_gauss_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_gauss_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/yukawa_colloid.cubin: lal_yukawa_colloid.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_yukawa_colloid.cu $(OBJ_DIR)/yukawa_colloid_cubin.h: $(OBJ_DIR)/yukawa_colloid.cubin $(OBJ_DIR)/yukawa_colloid.cubin $(BIN2C) -c -n yukawa_colloid $(OBJ_DIR)/yukawa_colloid.cubin > $(OBJ_DIR)/yukawa_colloid_cubin.h $(OBJ_DIR)/lal_yukawa_colloid.o: $(ALL_H) lal_yukawa_colloid.h lal_yukawa_colloid.cpp $(OBJ_DIR)/yukawa_colloid_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_yukawa_colloid.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_yukawa_colloid_ext.o: $(ALL_H) lal_yukawa_colloid.h lal_yukawa_colloid_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_yukawa_colloid_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_coul_debye.cubin: lal_lj_coul_debye.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_coul_debye.cu $(OBJ_DIR)/lj_coul_debye_cubin.h: $(OBJ_DIR)/lj_coul_debye.cubin $(OBJ_DIR)/lj_coul_debye.cubin $(BIN2C) -c -n lj_coul_debye $(OBJ_DIR)/lj_coul_debye.cubin > $(OBJ_DIR)/lj_coul_debye_cubin.h $(OBJ_DIR)/lal_lj_coul_debye.o: $(ALL_H) lal_lj_coul_debye.h lal_lj_coul_debye.cpp $(OBJ_DIR)/lj_coul_debye_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_lj_coul_debye.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_coul_debye_ext.o: $(ALL_H) lal_lj_coul_debye.h lal_lj_coul_debye_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_lj_coul_debye_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/coul_dsf.cubin: lal_coul_dsf.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul_dsf.cu $(OBJ_DIR)/coul_dsf_cubin.h: $(OBJ_DIR)/coul_dsf.cubin $(OBJ_DIR)/coul_dsf.cubin $(BIN2C) -c -n coul_dsf $(OBJ_DIR)/coul_dsf.cubin > $(OBJ_DIR)/coul_dsf_cubin.h $(OBJ_DIR)/lal_coul_dsf.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf.cpp $(OBJ_DIR)/coul_dsf_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_coul_dsf.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_coul_dsf_ext.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_coul_dsf_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/sw.cubin: lal_sw.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_sw.cu $(OBJ_DIR)/sw_cubin.h: $(OBJ_DIR)/sw.cubin $(OBJ_DIR)/sw.cubin $(BIN2C) -c -n sw $(OBJ_DIR)/sw.cubin > $(OBJ_DIR)/sw_cubin.h $(OBJ_DIR)/lal_sw.o: $(ALL_H) lal_sw.h lal_sw.cpp $(OBJ_DIR)/sw_cubin.h $(OBJ_DIR)/lal_base_three.o $(CUDR) -o $@ -c lal_sw.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_sw_ext.o: $(ALL_H) lal_sw.h lal_sw_ext.cpp lal_base_three.h $(CUDR) -o $@ -c lal_sw_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/beck.cubin: lal_beck.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_beck.cu $(OBJ_DIR)/beck_cubin.h: $(OBJ_DIR)/beck.cubin $(OBJ_DIR)/beck.cubin $(BIN2C) -c -n beck $(OBJ_DIR)/beck.cubin > $(OBJ_DIR)/beck_cubin.h $(OBJ_DIR)/lal_beck.o: $(ALL_H) lal_beck.h lal_beck.cpp $(OBJ_DIR)/beck_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_beck.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_beck_ext.o: $(ALL_H) lal_beck.h lal_beck_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_beck_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/mie.cubin: lal_mie.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_mie.cu $(OBJ_DIR)/mie_cubin.h: $(OBJ_DIR)/mie.cubin $(OBJ_DIR)/mie.cubin $(BIN2C) -c -n mie $(OBJ_DIR)/mie.cubin > $(OBJ_DIR)/mie_cubin.h $(OBJ_DIR)/lal_mie.o: $(ALL_H) lal_mie.h lal_mie.cpp $(OBJ_DIR)/mie_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_mie.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_mie_ext.o: $(ALL_H) lal_mie.h lal_mie_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_mie_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/soft.cubin: lal_soft.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_soft.cu $(OBJ_DIR)/soft_cubin.h: $(OBJ_DIR)/soft.cubin $(OBJ_DIR)/soft.cubin $(BIN2C) -c -n soft $(OBJ_DIR)/soft.cubin > $(OBJ_DIR)/soft_cubin.h $(OBJ_DIR)/lal_soft.o: $(ALL_H) lal_soft.h lal_soft.cpp $(OBJ_DIR)/soft_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_soft.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_soft_ext.o: $(ALL_H) lal_soft.h lal_soft_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_soft_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_coul_msm.cubin: lal_lj_coul_msm.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_coul_msm.cu $(OBJ_DIR)/lj_coul_msm_cubin.h: $(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm.cubin $(BIN2C) -c -n lj_coul_msm $(OBJ_DIR)/lj_coul_msm.cubin > $(OBJ_DIR)/lj_coul_msm_cubin.h $(OBJ_DIR)/lal_lj_coul_msm.o: $(ALL_H) lal_lj_coul_msm.h lal_lj_coul_msm.cpp $(OBJ_DIR)/lj_coul_msm_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_lj_coul_msm.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_coul_msm_ext.o: $(ALL_H) lal_lj_coul_msm.h lal_lj_coul_msm_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_lj_coul_msm_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_gromacs.cubin: lal_lj_gromacs.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_gromacs.cu $(OBJ_DIR)/lj_gromacs_cubin.h: $(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs.cubin $(BIN2C) -c -n lj_gromacs $(OBJ_DIR)/lj_gromacs.cubin > $(OBJ_DIR)/lj_gromacs_cubin.h $(OBJ_DIR)/lal_lj_gromacs.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs.cpp $(OBJ_DIR)/lj_gromacs_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_lj_gromacs.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_gromacs_ext.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_lj_gromacs_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/dpd.cubin: lal_dpd.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_dpd.cu $(OBJ_DIR)/dpd_cubin.h: $(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd.cubin $(BIN2C) -c -n dpd $(OBJ_DIR)/dpd.cubin > $(OBJ_DIR)/dpd_cubin.h $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OBJ_DIR)/lal_base_dpd.o $(CUDR) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h $(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/tersoff.cubin: lal_tersoff.cu lal_precision.h lal_tersoff_extra.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff.cu $(OBJ_DIR)/tersoff_cubin.h: $(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff.cubin $(BIN2C) -c -n tersoff $(OBJ_DIR)/tersoff.cubin > $(OBJ_DIR)/tersoff_cubin.h $(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/tersoff_cubin.h $(OBJ_DIR)/lal_base_three.o $(CUDR) -o $@ -c lal_tersoff.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h $(CUDR) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/coul.cubin: lal_coul.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul.cu $(OBJ_DIR)/coul_cubin.h: $(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul.cubin $(BIN2C) -c -n coul $(OBJ_DIR)/coul.cubin > $(OBJ_DIR)/coul_cubin.h $(OBJ_DIR)/lal_coul.o: $(ALL_H) lal_coul.h lal_coul.cpp $(OBJ_DIR)/coul_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_coul.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_coul_ext.o: $(ALL_H) lal_coul.h lal_coul_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_coul_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/coul_debye.cubin: lal_coul_debye.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul_debye.cu $(OBJ_DIR)/coul_debye_cubin.h: $(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye.cubin $(BIN2C) -c -n coul_debye $(OBJ_DIR)/coul_debye.cubin > $(OBJ_DIR)/coul_debye_cubin.h $(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ_DIR)/coul_debye_cubin.h $(OBJ_DIR)/lal_base_charge.o $(CUDR) -o $@ -c lal_coul_debye.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/zbl.cubin: lal_zbl.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_zbl.cu $(OBJ_DIR)/zbl_cubin.h: $(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl.cubin $(BIN2C) -c -n zbl $(OBJ_DIR)/zbl.cubin > $(OBJ_DIR)/zbl_cubin.h $(OBJ_DIR)/lal_zbl.o: $(ALL_H) lal_zbl.h lal_zbl.cpp $(OBJ_DIR)/zbl_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_zbl.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_zbl_ext.o: $(ALL_H) lal_zbl.h lal_zbl_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_zbl_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_cubic.cubin: lal_lj_cubic.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_cubic.cu $(OBJ_DIR)/lj_cubic_cubin.h: $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic.cubin $(BIN2C) -c -n lj_cubic $(OBJ_DIR)/lj_cubic.cubin > $(OBJ_DIR)/lj_cubic_cubin.h $(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp $(OBJ_DIR)/lj_cubic_cubin.h $(OBJ_DIR)/lal_base_atomic.o $(CUDR) -o $@ -c lal_lj_cubic.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h $(CUDR) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR) $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H) $(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda $(GPU_LIB): $(OBJS) $(CUDPP) $(AR) -crusv $(GPU_LIB) $(OBJS) $(CUDPP) @cp $(EXTRAMAKE) Makefile.lammps clean: rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CBNS) *.linkinfo veryclean: clean rm -rf *~ *.linkinfo cleanlib: rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CBNS) *.linkinfo diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile index 8f3f4f017..d7bae0f49 100644 --- a/lib/gpu/Opencl.makefile +++ b/lib/gpu/Opencl.makefile @@ -1,556 +1,563 @@ OCL = $(OCL_CPP) $(OCL_PREC) $(OCL_TUNE) -DUSE_OPENCL OCL_LIB = $(LIB_DIR)/libgpu.a # Headers for Geryon UCL_H = $(wildcard ./geryon/ucl*.h) OCL_H = $(wildcard ./geryon/ocl*.h) $(UCL_H) # Headers for Pair Stuff PAIR_H = lal_atom.h lal_answer.h lal_neighbor_shared.h \ lal_neighbor.h lal_precision.h lal_device.h \ lal_balance.h lal_pppm.h # Headers for Preprocessor/Auxiliary Functions PRE1_H = lal_preprocessor.h lal_aux_fun1.h ALL_H = $(OCL_H) $(PAIR_H) EXECS = $(BIN_DIR)/ocl_get_devices OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \ $(OBJ_DIR)/lal_neighbor_shared.o $(OBJ_DIR)/lal_neighbor.o \ $(OBJ_DIR)/lal_device.o $(OBJ_DIR)/lal_base_atomic.o \ $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_base_ellipsoid.o \ $(OBJ_DIR)/lal_base_dipole.o $(OBJ_DIR)/lal_base_three.o \ $(OBJ_DIR)/lal_base_dpd.o \ $(OBJ_DIR)/lal_pppm.o $(OBJ_DIR)/lal_pppm_ext.o \ $(OBJ_DIR)/lal_gayberne.o $(OBJ_DIR)/lal_gayberne_ext.o \ $(OBJ_DIR)/lal_re_squared.o $(OBJ_DIR)/lal_re_squared_ext.o \ $(OBJ_DIR)/lal_lj.o $(OBJ_DIR)/lal_lj_ext.o \ $(OBJ_DIR)/lal_lj96.o $(OBJ_DIR)/lal_lj96_ext.o \ $(OBJ_DIR)/lal_lj_expand.o $(OBJ_DIR)/lal_lj_expand_ext.o \ $(OBJ_DIR)/lal_lj_coul.o $(OBJ_DIR)/lal_lj_coul_ext.o \ $(OBJ_DIR)/lal_lj_coul_long.o $(OBJ_DIR)/lal_lj_coul_long_ext.o \ $(OBJ_DIR)/lal_lj_dsf.o $(OBJ_DIR)/lal_lj_dsf_ext.o \ $(OBJ_DIR)/lal_lj_class2_long.o $(OBJ_DIR)/lal_lj_class2_long_ext.o \ $(OBJ_DIR)/lal_coul_long.o $(OBJ_DIR)/lal_coul_long_ext.o \ $(OBJ_DIR)/lal_morse.o $(OBJ_DIR)/lal_morse_ext.o \ $(OBJ_DIR)/lal_charmm_long.o $(OBJ_DIR)/lal_charmm_long_ext.o \ $(OBJ_DIR)/lal_cg_cmm.o $(OBJ_DIR)/lal_cg_cmm_ext.o \ $(OBJ_DIR)/lal_cg_cmm_long.o $(OBJ_DIR)/lal_cg_cmm_long_ext.o \ $(OBJ_DIR)/lal_eam.o $(OBJ_DIR)/lal_eam_ext.o \ + $(OBJ_DIR)/lal_eam_fs_ext.o $(OBJ_DIR)/lal_eam_alloy_ext.o \ $(OBJ_DIR)/lal_buck.o $(OBJ_DIR)/lal_buck_ext.o \ $(OBJ_DIR)/lal_buck_coul.o $(OBJ_DIR)/lal_buck_coul_ext.o \ $(OBJ_DIR)/lal_buck_coul_long.o $(OBJ_DIR)/lal_buck_coul_long_ext.o \ $(OBJ_DIR)/lal_table.o $(OBJ_DIR)/lal_table_ext.o \ $(OBJ_DIR)/lal_yukawa.o $(OBJ_DIR)/lal_yukawa_ext.o \ $(OBJ_DIR)/lal_born.o $(OBJ_DIR)/lal_born_ext.o \ $(OBJ_DIR)/lal_born_coul_wolf.o $(OBJ_DIR)/lal_born_coul_wolf_ext.o \ $(OBJ_DIR)/lal_born_coul_long.o $(OBJ_DIR)/lal_born_coul_long_ext.o \ $(OBJ_DIR)/lal_dipole_lj.o $(OBJ_DIR)/lal_dipole_lj_ext.o \ $(OBJ_DIR)/lal_dipole_lj_sf.o $(OBJ_DIR)/lal_dipole_lj_sf_ext.o \ $(OBJ_DIR)/lal_colloid.o $(OBJ_DIR)/lal_colloid_ext.o \ $(OBJ_DIR)/lal_gauss.o $(OBJ_DIR)/lal_gauss_ext.o \ $(OBJ_DIR)/lal_yukawa_colloid.o $(OBJ_DIR)/lal_yukawa_colloid_ext.o \ $(OBJ_DIR)/lal_lj_coul_debye.o $(OBJ_DIR)/lal_lj_coul_debye_ext.o \ $(OBJ_DIR)/lal_coul_dsf.o $(OBJ_DIR)/lal_coul_dsf_ext.o \ $(OBJ_DIR)/lal_sw.o $(OBJ_DIR)/lal_sw_ext.o \ $(OBJ_DIR)/lal_beck.o $(OBJ_DIR)/lal_beck_ext.o \ $(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \ $(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \ $(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \ $(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \ $(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \ $(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \ $(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \ $(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \ $(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \ $(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \ $(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \ $(OBJ_DIR)/ellipsoid_nbor_cl.h $(OBJ_DIR)/gayberne_cl.h \ $(OBJ_DIR)/gayberne_lj_cl.h $(OBJ_DIR)/re_squared_cl.h \ $(OBJ_DIR)/re_squared_lj_cl.h $(OBJ_DIR)/lj_cl.h $(OBJ_DIR)/lj96_cl.h \ $(OBJ_DIR)/lj_expand_cl.h $(OBJ_DIR)/lj_coul_cl.h \ $(OBJ_DIR)/lj_coul_long_cl.h $(OBJ_DIR)/lj_dsf_cl.h \ $(OBJ_DIR)/lj_class2_long_cl.h \ $(OBJ_DIR)/coul_long_cl.h $(OBJ_DIR)/morse_cl.h \ $(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/cg_cmm_cl.h \ $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/neighbor_gpu_cl.h \ $(OBJ_DIR)/eam_cl.h $(OBJ_DIR)/buck_cl.h \ $(OBJ_DIR)/buck_coul_cl.h $(OBJ_DIR)/buck_coul_long_cl.h \ $(OBJ_DIR)/table_cl.h $(OBJ_DIR)/yukawa_cl.h \ $(OBJ_DIR)/born_cl.h $(OBJ_DIR)/born_coul_wolf_cl.h \ $(OBJ_DIR)/born_coul_long_cl.h $(OBJ_DIR)/dipole_lj_cl.h \ $(OBJ_DIR)/dipole_lj_sf_cl.h $(OBJ_DIR)/colloid_cl.h \ $(OBJ_DIR)/gauss_cl.h $(OBJ_DIR)/yukawa_colloid_cl.h \ $(OBJ_DIR)/lj_coul_debye_cl.h $(OBJ_DIR)/coul_dsf_cl.h \ $(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/mie_cl.h \ $(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \ $(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h \ $(OBJ_DIR)/lj_gauss_cl.h $(OBJ_DIR)/dzugutov_cl.h \ $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/coul_cl.h \ $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \ $(OBJ_DIR)/lj_cubic_cl.h OCL_EXECS = $(BIN_DIR)/ocl_get_devices all: $(OBJ_DIR) $(OCL_LIB) $(EXECS) $(OBJ_DIR): mkdir -p $@ $(OBJ_DIR)/atom_cl.h: lal_atom.cu lal_preprocessor.h $(BSH) ./geryon/file_to_cstr.sh atom lal_preprocessor.h lal_atom.cu $(OBJ_DIR)/atom_cl.h $(OBJ_DIR)/lal_atom.o: lal_atom.cpp lal_atom.h $(OCL_H) $(OBJ_DIR)/atom_cl.h $(OCL) -o $@ -c lal_atom.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_answer.o: lal_answer.cpp lal_answer.h $(OCL_H) $(OCL) -o $@ -c lal_answer.cpp -I$(OBJ_DIR) $(OBJ_DIR)/neighbor_cpu_cl.h: lal_neighbor_cpu.cu lal_preprocessor.h $(BSH) ./geryon/file_to_cstr.sh neighbor_cpu lal_preprocessor.h lal_neighbor_cpu.cu $(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/neighbor_gpu_cl.h: lal_neighbor_gpu.cu lal_preprocessor.h $(BSH) ./geryon/file_to_cstr.sh neighbor_gpu lal_preprocessor.h lal_neighbor_gpu.cu $(OBJ_DIR)/neighbor_gpu_cl.h $(OBJ_DIR)/lal_neighbor_shared.o: lal_neighbor_shared.cpp lal_neighbor_shared.h $(OCL_H) $(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/neighbor_gpu_cl.h $(OCL) -o $@ -c lal_neighbor_shared.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_neighbor.o: lal_neighbor.cpp lal_neighbor.h $(OCL_H) lal_neighbor_shared.h $(OCL) -o $@ -c lal_neighbor.cpp -I$(OBJ_DIR) $(OBJ_DIR)/device_cl.h: lal_device.cu lal_preprocessor.h $(BSH) ./geryon/file_to_cstr.sh device lal_preprocessor.h lal_device.cu $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/lal_device.o: lal_device.cpp lal_device.h $(ALL_H) $(OBJ_DIR)/device_cl.h $(OCL) -o $@ -c lal_device.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_base_atomic.o: $(OCL_H) lal_base_atomic.h lal_base_atomic.cpp $(OCL) -o $@ -c lal_base_atomic.cpp $(OBJ_DIR)/lal_base_charge.o: $(OCL_H) lal_base_charge.h lal_base_charge.cpp $(OCL) -o $@ -c lal_base_charge.cpp $(OBJ_DIR)/lal_base_ellipsoid.o: $(OCL_H) lal_base_ellipsoid.h lal_base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_cl.h $(OCL) -o $@ -c lal_base_ellipsoid.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_base_dipole.o: $(OCL_H) lal_base_dipole.h lal_base_dipole.cpp $(OCL) -o $@ -c lal_base_dipole.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_base_three.o: $(OCL_H) lal_base_three.h lal_base_three.cpp $(OCL) -o $@ -c lal_base_three.cpp $(OBJ_DIR)/lal_base_dpd.o: $(OCL_H) lal_base_dpd.h lal_base_dpd.cpp $(OCL) -o $@ -c lal_base_dpd.cpp $(OBJ_DIR)/pppm_cl.h: lal_pppm.cu lal_preprocessor.h $(BSH) ./geryon/file_to_cstr.sh pppm lal_preprocessor.h lal_pppm.cu $(OBJ_DIR)/pppm_cl.h; $(OBJ_DIR)/lal_pppm.o: $(ALL_H) lal_pppm.h lal_pppm.cpp $(OBJ_DIR)/pppm_cl.h $(OBJ_DIR)/pppm_cl.h $(OCL) -o $@ -c lal_pppm.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_pppm_ext.o: $(ALL_H) lal_pppm.h lal_pppm_ext.cpp $(OCL) -o $@ -c lal_pppm_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/ellipsoid_nbor_cl.h: lal_ellipsoid_nbor.cu lal_preprocessor.h $(BSH) ./geryon/file_to_cstr.sh ellipsoid_nbor lal_preprocessor.h lal_ellipsoid_nbor.cu $(OBJ_DIR)/ellipsoid_nbor_cl.h $(OBJ_DIR)/gayberne_cl.h: lal_gayberne.cu lal_ellipsoid_extra.h lal_aux_fun1.h lal_preprocessor.h $(BSH) ./geryon/file_to_cstr.sh gayberne lal_preprocessor.h lal_aux_fun1.h lal_ellipsoid_extra.h lal_gayberne.cu $(OBJ_DIR)/gayberne_cl.h; $(OBJ_DIR)/gayberne_lj_cl.h: lal_gayberne_lj.cu lal_ellipsoid_extra.h lal_aux_fun1.h lal_preprocessor.h $(BSH) ./geryon/file_to_cstr.sh gayberne_lj lal_preprocessor.h lal_aux_fun1.h lal_ellipsoid_extra.h lal_gayberne_lj.cu $(OBJ_DIR)/gayberne_lj_cl.h; $(OBJ_DIR)/lal_gayberne.o: $(ALL_H) lal_gayberne.h lal_gayberne.cpp $(OBJ_DIR)/gayberne_cl.h $(OBJ_DIR)/gayberne_lj_cl.h $(OBJ_DIR)/lal_base_ellipsoid.o $(OCL) -o $@ -c lal_gayberne.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/lal_gayberne.o lal_gayberne_ext.cpp $(OCL) -o $@ -c lal_gayberne_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/re_squared_cl.h: lal_re_squared.cu lal_ellipsoid_extra.h lal_aux_fun1.h lal_preprocessor.h $(BSH) ./geryon/file_to_cstr.sh re_squared lal_preprocessor.h lal_aux_fun1.h lal_ellipsoid_extra.h lal_re_squared.cu $(OBJ_DIR)/re_squared_cl.h; $(OBJ_DIR)/re_squared_lj_cl.h: lal_re_squared_lj.cu lal_ellipsoid_extra.h lal_aux_fun1.h lal_preprocessor.h $(BSH) ./geryon/file_to_cstr.sh re_squared_lj lal_preprocessor.h lal_aux_fun1.h lal_ellipsoid_extra.h lal_re_squared_lj.cu $(OBJ_DIR)/re_squared_lj_cl.h; $(OBJ_DIR)/lal_re_squared.o: $(ALL_H) lal_re_squared.h lal_re_squared.cpp $(OBJ_DIR)/re_squared_cl.h $(OBJ_DIR)/re_squared_lj_cl.h $(OBJ_DIR)/lal_base_ellipsoid.o $(OCL) -o $@ -c lal_re_squared.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/lal_re_squared.o lal_re_squared_ext.cpp $(OCL) -o $@ -c lal_re_squared_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_cl.h: lal_lj.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh lj $(PRE1_H) lal_lj.cu $(OBJ_DIR)/lj_cl.h; $(OBJ_DIR)/lal_lj.o: $(ALL_H) lal_lj.h lal_lj.cpp $(OBJ_DIR)/lj_cl.h $(OBJ_DIR)/lj_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_lj.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_ext.o: $(ALL_H) lal_lj.h lal_lj_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_lj_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_coul_cl.h: lal_lj_coul.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh lj_coul $(PRE1_H) lal_lj_coul.cu $(OBJ_DIR)/lj_coul_cl.h; $(OBJ_DIR)/lal_lj_coul.o: $(ALL_H) lal_lj_coul.h lal_lj_coul.cpp $(OBJ_DIR)/lj_coul_cl.h $(OBJ_DIR)/lj_coul_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_lj_coul.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_coul_ext.o: $(ALL_H) lal_lj_coul.h lal_lj_coul_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_lj_coul_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_coul_long_cl.h: lal_lj_coul_long.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh lj_coul_long $(PRE1_H) lal_lj_coul_long.cu $(OBJ_DIR)/lj_coul_long_cl.h; $(OBJ_DIR)/lal_lj_coul_long.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long.cpp $(OBJ_DIR)/lj_coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_lj_coul_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_coul_long_ext.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_lj_coul_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_dsf_cl.h: lal_lj_dsf.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh lj_dsf $(PRE1_H) lal_lj_dsf.cu $(OBJ_DIR)/lj_dsf_cl.h; $(OBJ_DIR)/lal_lj_dsf.o: $(ALL_H) lal_lj_dsf.h lal_lj_dsf.cpp $(OBJ_DIR)/lj_dsf_cl.h $(OBJ_DIR)/lj_dsf_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_lj_dsf.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_dsf_ext.o: $(ALL_H) lal_lj_dsf.h lal_lj_dsf_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_lj_dsf_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_class2_long_cl.h: lal_lj_class2_long.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh lj_class2_long $(PRE1_H) lal_lj_class2_long.cu $(OBJ_DIR)/lj_class2_long_cl.h; $(OBJ_DIR)/lal_lj_class2_long.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long.cpp $(OBJ_DIR)/lj_class2_long_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_lj_class2_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_class2_long_ext.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_lj_class2_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/coul_long_cl.h: lal_coul_long.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh coul_long $(PRE1_H) lal_coul_long.cu $(OBJ_DIR)/coul_long_cl.h; $(OBJ_DIR)/lal_coul_long.o: $(ALL_H) lal_coul_long.h lal_coul_long.cpp $(OBJ_DIR)/coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_coul_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_coul_long_ext.o: $(ALL_H) lal_coul_long.h lal_coul_long_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_coul_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/morse_cl.h: lal_morse.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh morse $(PRE1_H) lal_morse.cu $(OBJ_DIR)/morse_cl.h; $(OBJ_DIR)/lal_morse.o: $(ALL_H) lal_morse.h lal_morse.cpp $(OBJ_DIR)/morse_cl.h $(OBJ_DIR)/morse_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_morse.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_morse_ext.o: $(ALL_H) lal_morse.h lal_morse_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_morse_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/charmm_long_cl.h: lal_charmm_long.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh charmm_long $(PRE1_H) lal_charmm_long.cu $(OBJ_DIR)/charmm_long_cl.h; $(OBJ_DIR)/lal_charmm_long.o: $(ALL_H) lal_charmm_long.h lal_charmm_long.cpp $(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_charmm_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_charmm_long_ext.o: $(ALL_H) lal_charmm_long.h lal_charmm_long_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_charmm_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj96_cl.h: lal_lj96.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh lj96 $(PRE1_H) lal_lj96.cu $(OBJ_DIR)/lj96_cl.h; $(OBJ_DIR)/lal_lj96.o: $(ALL_H) lal_lj96.h lal_lj96.cpp $(OBJ_DIR)/lj96_cl.h $(OBJ_DIR)/lj96_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_lj96.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj96_ext.o: $(ALL_H) lal_lj96.h lal_lj96_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_lj96_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_expand_cl.h: lal_lj_expand.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh lj_expand $(PRE1_H) lal_lj_expand.cu $(OBJ_DIR)/lj_expand_cl.h; $(OBJ_DIR)/lal_lj_expand.o: $(ALL_H) lal_lj_expand.h lal_lj_expand.cpp $(OBJ_DIR)/lj_expand_cl.h $(OBJ_DIR)/lj_expand_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_lj_expand.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_expand_ext.o: $(ALL_H) lal_lj_expand.h lal_lj_expand_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_lj_expand_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/cg_cmm_cl.h: lal_cg_cmm.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh cg_cmm $(PRE1_H) lal_cg_cmm.cu $(OBJ_DIR)/cg_cmm_cl.h; $(OBJ_DIR)/lal_cg_cmm.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm.cpp $(OBJ_DIR)/cg_cmm_cl.h $(OBJ_DIR)/cg_cmm_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_cg_cmm.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_cg_cmm_ext.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_cg_cmm_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/cg_cmm_long_cl.h: lal_cg_cmm_long.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh cg_cmm_long $(PRE1_H) lal_cg_cmm_long.cu $(OBJ_DIR)/cg_cmm_long_cl.h; $(OBJ_DIR)/lal_cg_cmm_long.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long.cpp $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_cg_cmm_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_cg_cmm_long_ext.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_cg_cmm_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/eam_cl.h: lal_eam.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh eam $(PRE1_H) lal_eam.cu $(OBJ_DIR)/eam_cl.h; $(OBJ_DIR)/lal_eam.o: $(ALL_H) lal_eam.h lal_eam.cpp $(OBJ_DIR)/eam_cl.h $(OBJ_DIR)/eam_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_eam.cpp -I$(OBJ_DIR) -$(OBJ_DIR)/lal_eam_ext.o: $(ALL_H) lal_eam.h lal_eam_ext.cpp lal_base_charge.h +$(OBJ_DIR)/lal_eam_ext.o: $(ALL_H) lal_eam.h lal_eam_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_eam_ext.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/lal_eam_fs_ext.o: $(ALL_H) lal_eam.h lal_eam_fs_ext.cpp lal_base_atomic.h + $(OCL) -o $@ -c lal_eam_fs_ext.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lal_eam_alloy_ext.o: $(ALL_H) lal_eam.h lal_eam_alloy_ext.cpp lal_base_atomic.h + $(OCL) -o $@ -c lal_eam_alloy_ext.cpp -I$(OBJ_DIR) + $(OBJ_DIR)/buck_cl.h: lal_buck.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh buck $(PRE1_H) lal_buck.cu $(OBJ_DIR)/buck_cl.h; $(OBJ_DIR)/lal_buck.o: $(ALL_H) lal_buck.h lal_buck.cpp $(OBJ_DIR)/buck_cl.h $(OBJ_DIR)/buck_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_buck.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_buck_ext.o: $(ALL_H) lal_buck.h lal_buck_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_buck_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/buck_coul_cl.h: lal_buck_coul.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh buck_coul $(PRE1_H) lal_buck_coul.cu $(OBJ_DIR)/buck_coul_cl.h; $(OBJ_DIR)/lal_buck_coul.o: $(ALL_H) lal_buck_coul.h lal_buck_coul.cpp $(OBJ_DIR)/buck_coul_cl.h $(OBJ_DIR)/buck_coul_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_buck_coul.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_buck_coul_ext.o: $(ALL_H) lal_buck_coul.h lal_buck_coul_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_buck_coul_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/buck_coul_long_cl.h: lal_buck_coul_long.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh buck_coul_long $(PRE1_H) lal_buck_coul_long.cu $(OBJ_DIR)/buck_coul_long_cl.h; $(OBJ_DIR)/lal_buck_coul_long.o: $(ALL_H) lal_buck_coul_long.h lal_buck_coul_long.cpp $(OBJ_DIR)/buck_coul_long_cl.h $(OBJ_DIR)/buck_coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_buck_coul_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_buck_coul_long_ext.o: $(ALL_H) lal_buck_coul_long.h lal_buck_coul_long_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_buck_coul_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/table_cl.h: lal_table.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh table $(PRE1_H) lal_table.cu $(OBJ_DIR)/table_cl.h; $(OBJ_DIR)/lal_table.o: $(ALL_H) lal_table.h lal_table.cpp $(OBJ_DIR)/table_cl.h $(OBJ_DIR)/table_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_table.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_table_ext.o: $(ALL_H) lal_table.h lal_table_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_table_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/yukawa_cl.h: lal_yukawa.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh yukawa $(PRE1_H) lal_yukawa.cu $(OBJ_DIR)/yukawa_cl.h; $(OBJ_DIR)/lal_yukawa.o: $(ALL_H) lal_yukawa.h lal_yukawa.cpp $(OBJ_DIR)/yukawa_cl.h $(OBJ_DIR)/yukawa_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_yukawa.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_yukawa_ext.o: $(ALL_H) lal_yukawa.h lal_yukawa_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_yukawa_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/born_cl.h: lal_born.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh born $(PRE1_H) lal_born.cu $(OBJ_DIR)/born_cl.h; $(OBJ_DIR)/lal_born.o: $(ALL_H) lal_born.h lal_born.cpp $(OBJ_DIR)/born_cl.h $(OBJ_DIR)/born_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_born.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_born_ext.o: $(ALL_H) lal_born.h lal_born_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_born_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/born_coul_wolf_cl.h: lal_born_coul_wolf.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh born_coul_wolf $(PRE1_H) lal_born_coul_wolf.cu $(OBJ_DIR)/born_coul_wolf_cl.h; $(OBJ_DIR)/lal_born_coul_wolf.o: $(ALL_H) lal_born_coul_wolf.h lal_born_coul_wolf.cpp $(OBJ_DIR)/born_coul_wolf_cl.h $(OBJ_DIR)/born_coul_wolf_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_born_coul_wolf.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_born_coul_wolf_ext.o: $(ALL_H) lal_born_coul_wolf.h lal_born_coul_wolf_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_born_coul_wolf_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/born_coul_long_cl.h: lal_born_coul_long.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh born_coul_long $(PRE1_H) lal_born_coul_long.cu $(OBJ_DIR)/born_coul_long_cl.h; $(OBJ_DIR)/lal_born_coul_long.o: $(ALL_H) lal_born_coul_long.h lal_born_coul_long.cpp $(OBJ_DIR)/born_coul_long_cl.h $(OBJ_DIR)/born_coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_born_coul_long.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_born_coul_long_ext.o: $(ALL_H) lal_born_coul_long.h lal_born_coul_long_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_born_coul_long_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/dipole_lj_cl.h: lal_dipole_lj.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh dipole_lj $(PRE1_H) lal_dipole_lj.cu $(OBJ_DIR)/dipole_lj_cl.h; $(OBJ_DIR)/lal_dipole_lj.o: $(ALL_H) lal_dipole_lj.h lal_dipole_lj.cpp $(OBJ_DIR)/dipole_lj_cl.h $(OBJ_DIR)/dipole_lj_cl.h $(OBJ_DIR)/lal_base_dipole.o $(OCL) -o $@ -c lal_dipole_lj.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_dipole_lj_ext.o: $(ALL_H) lal_dipole_lj.h lal_dipole_lj_ext.cpp lal_base_dipole.h $(OCL) -o $@ -c lal_dipole_lj_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/dipole_lj_sf_cl.h: lal_dipole_lj_sf.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh dipole_lj_sf $(PRE1_H) lal_dipole_lj_sf.cu $(OBJ_DIR)/dipole_lj_sf_cl.h; $(OBJ_DIR)/lal_dipole_lj_sf.o: $(ALL_H) lal_dipole_lj_sf.h lal_dipole_lj_sf.cpp $(OBJ_DIR)/dipole_lj_sf_cl.h $(OBJ_DIR)/dipole_lj_sf_cl.h $(OBJ_DIR)/lal_base_dipole.o $(OCL) -o $@ -c lal_dipole_lj_sf.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_dipole_lj_sf_ext.o: $(ALL_H) lal_dipole_lj_sf.h lal_dipole_lj_sf_ext.cpp lal_base_dipole.h $(OCL) -o $@ -c lal_dipole_lj_sf_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/colloid_cl.h: lal_colloid.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh colloid $(PRE1_H) lal_colloid.cu $(OBJ_DIR)/colloid_cl.h; $(OBJ_DIR)/lal_colloid.o: $(ALL_H) lal_colloid.h lal_colloid.cpp $(OBJ_DIR)/colloid_cl.h $(OBJ_DIR)/colloid_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_colloid.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_colloid_ext.o: $(ALL_H) lal_colloid.h lal_colloid_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_colloid_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/gauss_cl.h: lal_gauss.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh gauss $(PRE1_H) lal_gauss.cu $(OBJ_DIR)/gauss_cl.h; $(OBJ_DIR)/lal_gauss.o: $(ALL_H) lal_gauss.h lal_gauss.cpp $(OBJ_DIR)/gauss_cl.h $(OBJ_DIR)/gauss_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_gauss.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_gauss_ext.o: $(ALL_H) lal_gauss.h lal_gauss_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_gauss_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/yukawa_colloid_cl.h: lal_yukawa_colloid.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh yukawa_colloid $(PRE1_H) lal_yukawa_colloid.cu $(OBJ_DIR)/yukawa_colloid_cl.h; $(OBJ_DIR)/lal_yukawa_colloid.o: $(ALL_H) lal_yukawa_colloid.h lal_yukawa_colloid.cpp $(OBJ_DIR)/yukawa_colloid_cl.h $(OBJ_DIR)/yukawa_colloid_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_yukawa_colloid.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_yukawa_colloid_ext.o: $(ALL_H) lal_yukawa_colloid.h lal_yukawa_colloid_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_yukawa_colloid_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_coul_debye_cl.h: lal_lj_coul_debye.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh lj_coul_debye $(PRE1_H) lal_lj_coul_debye.cu $(OBJ_DIR)/lj_coul_debye_cl.h; $(OBJ_DIR)/lal_lj_coul_debye.o: $(ALL_H) lal_lj_coul_debye.h lal_lj_coul_debye.cpp $(OBJ_DIR)/lj_coul_debye_cl.h $(OBJ_DIR)/lj_coul_debye_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_lj_coul_debye.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_coul_debye_ext.o: $(ALL_H) lal_lj_coul_debye.h lal_lj_coul_debye_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_lj_coul_debye_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/coul_dsf_cl.h: lal_coul_dsf.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh coul_dsf $(PRE1_H) lal_coul_dsf.cu $(OBJ_DIR)/coul_dsf_cl.h; $(OBJ_DIR)/lal_coul_dsf.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf.cpp $(OBJ_DIR)/coul_dsf_cl.h $(OBJ_DIR)/coul_dsf_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_coul_dsf.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_coul_dsf_ext.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_coul_dsf_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/sw_cl.h: lal_sw.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh sw $(PRE1_H) lal_sw.cu $(OBJ_DIR)/sw_cl.h; $(OBJ_DIR)/lal_sw.o: $(ALL_H) lal_sw.h lal_sw.cpp $(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/lal_base_three.o $(OCL) -o $@ -c lal_sw.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_sw_ext.o: $(ALL_H) lal_sw.h lal_sw_ext.cpp lal_base_three.h $(OCL) -o $@ -c lal_sw_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/beck_cl.h: lal_beck.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh beck $(PRE1_H) lal_beck.cu $(OBJ_DIR)/beck_cl.h; $(OBJ_DIR)/lal_beck.o: $(ALL_H) lal_beck.h lal_beck.cpp $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_beck.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_beck_ext.o: $(ALL_H) lal_beck.h lal_beck_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_beck_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/mie_cl.h: lal_mie.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh mie $(PRE1_H) lal_mie.cu $(OBJ_DIR)/mie_cl.h; $(OBJ_DIR)/lal_mie.o: $(ALL_H) lal_mie.h lal_mie.cpp $(OBJ_DIR)/mie_cl.h $(OBJ_DIR)/mie_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_mie.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_mie_ext.o: $(ALL_H) lal_mie.h lal_mie_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_mie_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/soft_cl.h: lal_soft.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh soft $(PRE1_H) lal_soft.cu $(OBJ_DIR)/soft_cl.h; $(OBJ_DIR)/lal_soft.o: $(ALL_H) lal_soft.h lal_soft.cpp $(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_soft.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_soft_ext.o: $(ALL_H) lal_soft.h lal_soft_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_soft_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_coul_msm_cl.h: lal_lj_coul_msm.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh lj_coul_msm $(PRE1_H) lal_lj_coul_msm.cu $(OBJ_DIR)/lj_coul_msm_cl.h; $(OBJ_DIR)/lal_lj_coul_msm.o: $(ALL_H) lal_lj_coul_msm.h lal_lj_coul_msm.cpp $(OBJ_DIR)/lj_coul_msm_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_lj_coul_msm.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_coul_msm_ext.o: $(ALL_H) lal_lj_coul_msm.h lal_lj_coul_msm_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_lj_coul_msm_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_gromacs_cl.h: lal_lj_gromacs.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh lj_gromacs $(PRE1_H) lal_lj_gromacs.cu $(OBJ_DIR)/lj_gromacs_cl.h; $(OBJ_DIR)/lal_lj_gromacs.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs.cpp $(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_lj_gromacs.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_gromacs_ext.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_lj_gromacs_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/dpd_cl.h: lal_dpd.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh dpd $(PRE1_H) lal_dpd.cu $(OBJ_DIR)/dpd_cl.h; $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cl.h $(OBJ_DIR)/dpd_cl.h $(OBJ_DIR)/lal_base_dpd.o $(OCL) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h $(OCL) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/tersoff_cl.h: lal_tersoff.cu lal_tersoff_extra.h $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh tersoff $(PRE1_H) lal_tersoff_extra.h lal_tersoff.cu $(OBJ_DIR)/tersoff_cl.h; $(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/lal_base_three.o $(OCL) -o $@ -c lal_tersoff.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h $(OCL) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/coul_cl.h: lal_coul.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh coul $(PRE1_H) lal_coul.cu $(OBJ_DIR)/coul_cl.h; $(OBJ_DIR)/lal_coul.o: $(ALL_H) lal_coul.h lal_coul.cpp $(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_coul.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_coul_ext.o: $(ALL_H) lal_coul.h lal_coul_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_coul_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/coul_debye_cl.h: lal_coul_debye.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh coul_debye $(PRE1_H) lal_coul_debye.cu $(OBJ_DIR)/coul_debye_cl.h; $(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/lal_base_charge.o $(OCL) -o $@ -c lal_coul_debye.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/zbl_cl.h: lal_zbl.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh zbl $(PRE1_H) lal_zbl.cu $(OBJ_DIR)/zbl_cl.h; $(OBJ_DIR)/lal_zbl.o: $(ALL_H) lal_zbl.h lal_zbl.cpp $(OBJ_DIR)/zbl_cl.h $(OBJ_DIR)/zbl_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_zbl.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_zbl_ext.o: $(ALL_H) lal_zbl.h lal_zbl_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_zbl_ext.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lj_cubic_cl.h: lal_lj_cubic.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh lj_cubic $(PRE1_H) lal_lj_cubic.cu $(OBJ_DIR)/lj_cubic_cl.h; $(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/lal_base_atomic.o $(OCL) -o $@ -c lal_lj_cubic.cpp -I$(OBJ_DIR) $(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h $(OCL) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR) $(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp $(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK) $(OCL_LIB): $(OBJS) $(PTXS) $(AR) -crusv $(OCL_LIB) $(OBJS) @cp $(EXTRAMAKE) Makefile.lammps opencl: $(OCL_EXECS) clean: rm -rf $(EXECS) $(OCL_EXECS) $(OCL_LIB) $(OBJS) $(KERS) *.linkinfo veryclean: clean rm -rf *~ *.linkinfo diff --git a/lib/gpu/lal_eam_alloy_ext.cpp b/lib/gpu/lal_eam_alloy_ext.cpp new file mode 100644 index 000000000..282f93afe --- /dev/null +++ b/lib/gpu/lal_eam_alloy_ext.cpp @@ -0,0 +1,143 @@ +/*************************************************************************** + eam_fs_ext.cpp + ------------------- + Trung Dac Nguyen, W. Michael Brown (ORNL) + + Functions for LAMMPS access to buck acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : brownw@ornl.gov nguyentd@ornl.gov + ***************************************************************************/ + +#include <iostream> +#include <cassert> +#include <math.h> + +#include "lal_eam.h" + +using namespace std; +using namespace LAMMPS_AL; + +static EAM<PRECISION,ACC_PRECISION> EAMALMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq, + int **host_type2rhor, int **host_type2z2r, int *host_type2frho, + double ***host_rhor_spline, double ***host_z2r_spline, + double ***host_frho_spline, + double rdr, double rdrho, double rhomax, int nrhor, + int nrho, int nz2r, int nfrho, int nr, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + int &gpu_mode, FILE *screen, int &fp_size) { + EAMALMF.clear(); + gpu_mode=EAMALMF.device->gpu_mode(); + double gpu_split=EAMALMF.device->particle_split(); + int first_gpu=EAMALMF.device->first_device(); + int last_gpu=EAMALMF.device->last_device(); + int world_me=EAMALMF.device->world_me(); + int gpu_rank=EAMALMF.device->gpu_rank(); + int procs_per_gpu=EAMALMF.device->procs_per_gpu(); + + // disable host/device split for now + if (gpu_split != 1.0) + return -8; + + fp_size=sizeof(PRECISION); + + EAMALMF.device->init_message(screen,"eam/alloy",first_gpu,last_gpu); + + bool message=false; + if (EAMALMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=EAMALMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r, + host_type2frho, host_rhor_spline, host_z2r_spline, + host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, nz2r, + nfrho, nr, nlocal, nall, 300, maxspecial, cell_size, + gpu_split, screen); + + EAMALMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; i<procs_per_gpu; i++) { + if (message) { + if (last_gpu-first_gpu==0) + fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i); + else + fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu, + last_gpu,i); + fflush(screen); + } + if (gpu_rank==i && world_me!=0) + init_ok=EAMALMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r, + host_type2frho, host_rhor_spline, host_z2r_spline, + host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, + nz2r, nfrho, nr, nlocal, nall, 300, maxspecial, + cell_size, gpu_split, screen); + + EAMALMF.device->gpu_barrier(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + EAMALMF.estimate_gpu_overhead(); + return init_ok; +} + +void eam_alloy_gpu_clear() { + EAMALMF.clear(); +} + +int ** eam_alloy_gpu_compute_n(const int ago, const int inum_full, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, int &inum, void **fp_ptr) { + return EAMALMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success, + inum, fp_ptr); +} + +void eam_alloy_gpu_compute(const int ago, const int inum_full, const int nlocal, + const int nall, double **host_x, int *host_type, + int *ilist, int *numj, int **firstneigh, const bool eflag, + const bool vflag, const bool eatom, const bool vatom, + int &host_start, const double cpu_time, bool &success, + void **fp_ptr) { + EAMALMF.compute(ago,inum_full,nlocal,nall,host_x,host_type,ilist,numj, + firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success, + fp_ptr); +} + +void eam_alloy_gpu_compute_force(int *ilist, const bool eflag, const bool vflag, + const bool eatom, const bool vatom) { + EAMALMF.compute2(ilist, eflag, vflag, eatom, vatom); +} + + +double eam_alloy_gpu_bytes() { + return EAMALMF.host_memory_usage(); +} + + diff --git a/lib/gpu/lal_eam_fs_ext.cpp b/lib/gpu/lal_eam_fs_ext.cpp new file mode 100644 index 000000000..4992f3ab9 --- /dev/null +++ b/lib/gpu/lal_eam_fs_ext.cpp @@ -0,0 +1,143 @@ +/*************************************************************************** + eam_fs_ext.cpp + ------------------- + Trung Dac Nguyen, W. Michael Brown (ORNL) + + Functions for LAMMPS access to buck acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : brownw@ornl.gov nguyentd@ornl.gov + ***************************************************************************/ + +#include <iostream> +#include <cassert> +#include <math.h> + +#include "lal_eam.h" + +using namespace std; +using namespace LAMMPS_AL; + +static EAM<PRECISION,ACC_PRECISION> EAMFSMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int eam_fs_gpu_init(const int ntypes, double host_cutforcesq, + int **host_type2rhor, int **host_type2z2r, int *host_type2frho, + double ***host_rhor_spline, double ***host_z2r_spline, + double ***host_frho_spline, + double rdr, double rdrho, double rhomax, int nrhor, + int nrho, int nz2r, int nfrho, int nr, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + int &gpu_mode, FILE *screen, int &fp_size) { + EAMFSMF.clear(); + gpu_mode=EAMFSMF.device->gpu_mode(); + double gpu_split=EAMFSMF.device->particle_split(); + int first_gpu=EAMFSMF.device->first_device(); + int last_gpu=EAMFSMF.device->last_device(); + int world_me=EAMFSMF.device->world_me(); + int gpu_rank=EAMFSMF.device->gpu_rank(); + int procs_per_gpu=EAMFSMF.device->procs_per_gpu(); + + // disable host/device split for now + if (gpu_split != 1.0) + return -8; + + fp_size=sizeof(PRECISION); + + EAMFSMF.device->init_message(screen,"eam/fs",first_gpu,last_gpu); + + bool message=false; + if (EAMFSMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=EAMFSMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r, + host_type2frho, host_rhor_spline, host_z2r_spline, + host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, nz2r, + nfrho, nr, nlocal, nall, 300, maxspecial, cell_size, + gpu_split, screen); + + EAMFSMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; i<procs_per_gpu; i++) { + if (message) { + if (last_gpu-first_gpu==0) + fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i); + else + fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu, + last_gpu,i); + fflush(screen); + } + if (gpu_rank==i && world_me!=0) + init_ok=EAMFSMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r, + host_type2frho, host_rhor_spline, host_z2r_spline, + host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, + nz2r, nfrho, nr, nlocal, nall, 300, maxspecial, + cell_size, gpu_split, screen); + + EAMFSMF.device->gpu_barrier(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + EAMFSMF.estimate_gpu_overhead(); + return init_ok; +} + +void eam_fs_gpu_clear() { + EAMFSMF.clear(); +} + +int ** eam_fs_gpu_compute_n(const int ago, const int inum_full, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, int &inum, void **fp_ptr) { + return EAMFSMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success, + inum, fp_ptr); +} + +void eam_fs_gpu_compute(const int ago, const int inum_full, const int nlocal, + const int nall, double **host_x, int *host_type, + int *ilist, int *numj, int **firstneigh, const bool eflag, + const bool vflag, const bool eatom, const bool vatom, + int &host_start, const double cpu_time, bool &success, + void **fp_ptr) { + EAMFSMF.compute(ago,inum_full,nlocal,nall,host_x,host_type,ilist,numj, + firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success, + fp_ptr); +} + +void eam_fs_gpu_compute_force(int *ilist, const bool eflag, const bool vflag, + const bool eatom, const bool vatom) { + EAMFSMF.compute2(ilist, eflag, vflag, eatom, vatom); +} + + +double eam_fs_gpu_bytes() { + return EAMFSMF.host_memory_usage(); +} + + diff --git a/src/GPU/pair_eam_alloy_gpu.cpp b/src/GPU/pair_eam_alloy_gpu.cpp index 74b43f110..9fb914cb0 100644 --- a/src/GPU/pair_eam_alloy_gpu.cpp +++ b/src/GPU/pair_eam_alloy_gpu.cpp @@ -1,324 +1,563 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Trung Dac Nguyen (ORNL), W. Michael Brown (ORNL) ------------------------------------------------------------------------- */ #include "stdio.h" #include "stdlib.h" #include "string.h" #include "pair_eam_alloy_gpu.h" #include "atom.h" +#include "force.h" #include "comm.h" +#include "neighbor.h" +#include "neigh_list.h" #include "memory.h" #include "error.h" +#include "neigh_request.h" +#include "gpu_extra.h" using namespace LAMMPS_NS; #define MAXLINE 1024 +// External functions from cuda library for atom decomposition + +int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq, + int **host_type2rhor, int **host_type2z2r, + int *host_type2frho, double ***host_rhor_spline, + double ***host_z2r_spline, double ***host_frho_spline, + double rdr, double rdrho, double rhomax, + int nrhor, int nrho, int nz2r, int nfrho, int nr, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, int &gpu_mode, + FILE *screen, int &fp_size); +void eam_alloy_gpu_clear(); +int** eam_alloy_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *tag, int **nspecial, tagint **special, + const bool eflag, const bool vflag, const bool eatom, + const bool vatom, int &host_start, int **ilist, + int **jnum, const double cpu_time, bool &success, + int &inum, void **fp_ptr); +void eam_alloy_gpu_compute(const int ago, const int inum_full, const int nlocal, + const int nall,double **host_x, int *host_type, + int *ilist, int *numj, int **firstneigh, + const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, void **fp_ptr); +void eam_alloy_gpu_compute_force(int *ilist, const bool eflag, const bool vflag, + const bool eatom, const bool vatom); +double eam_alloy_gpu_bytes(); + +/* ---------------------------------------------------------------------- */ + +PairEAMAlloyGPU::PairEAMAlloyGPU(LAMMPS *lmp) : PairEAM(lmp), gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + reinitflag = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- */ + +PairEAMAlloyGPU::~PairEAMAlloyGPU() +{ + eam_alloy_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +double PairEAMAlloyGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + eam_alloy_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairEAMAlloyGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + // compute density on each atom on GPU + + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + int inum, host_start, inum_dev; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = eam_alloy_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, inum_dev, &fp_pinned); + } else { // gpu_mode == GPU_FORCE + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + eam_alloy_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success, &fp_pinned); + } + + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); + + // communicate derivative of embedding function + + comm->forward_comm_pair(this); + + // compute forces on each atom on GPU + if (gpu_mode != GPU_FORCE) + eam_alloy_gpu_compute_force(NULL, eflag, vflag, eflag_atom, vflag_atom); + else + eam_alloy_gpu_compute_force(ilist, eflag, vflag, eflag_atom, vflag_atom); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairEAMAlloyGPU::init_style() +{ + if (force->newton_pair) + error->all(FLERR,"Cannot use newton pair with eam/alloy/gpu pair style"); + + // convert read-in file(s) to arrays and spline them + + file2array(); + array2spline(); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int fp_size; + int success = eam_alloy_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r, + type2frho, rhor_spline, z2r_spline, frho_spline, + rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, + atom->nlocal, atom->nlocal+atom->nghost, 300, + maxspecial, cell_size, gpu_mode, screen, fp_size); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } + + if (fp_size == sizeof(double)) + fp_single = false; + else + fp_single = true; +} + +/* ---------------------------------------------------------------------- */ + +double PairEAMAlloyGPU::single(int i, int j, int itype, int jtype, + double rsq, double factor_coul, double factor_lj, + double &fforce) +{ + int m; + double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip; + double *coeff; + + r = sqrt(rsq); + p = r*rdr + 1.0; + m = static_cast<int> (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + coeff = rhor_spline[type2rhor[itype][jtype]][m]; + rhoip = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = rhor_spline[type2rhor[jtype][itype]][m]; + rhojp = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = z2r_spline[type2z2r[itype][jtype]][m]; + z2p = (coeff[0]*p + coeff[1])*p + coeff[2]; + z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + + double fp_i,fp_j; + if (fp_single == false) { + fp_i = ((double*)fp_pinned)[i]; + fp_j = ((double*)fp_pinned)[j]; + } else { + fp_i = ((float*)fp_pinned)[i]; + fp_j = ((float*)fp_pinned)[j]; + } + + recip = 1.0/r; + phi = z2*recip; + phip = z2p*recip - phi*recip; + psip = fp_i*rhojp + fp_j*rhoip + phip; + fforce = -psip*recip; + + return phi; +} + +/* ---------------------------------------------------------------------- */ + +int PairEAMAlloyGPU::pack_forward_comm(int n, int *list, double *buf, + int pbc_flag,int *pbc) +{ + int i,j,m; + + m = 0; + + if (fp_single) { + float *fp_ptr = (float *)fp_pinned; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = static_cast<double>(fp_ptr[j]); + } + } else { + double *fp_ptr = (double *)fp_pinned; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = fp_ptr[j]; + } + } + + return m; +} + /* ---------------------------------------------------------------------- */ -PairEAMAlloyGPU::PairEAMAlloyGPU(LAMMPS *lmp) : PairEAMGPU(lmp) +void PairEAMAlloyGPU::unpack_forward_comm(int n, int first, double *buf) { - one_coeff = 1; + int i,m,last; + + m = 0; + last = first + n; + if (fp_single) { + float *fp_ptr = (float *)fp_pinned; + for (i = first; i < last; i++) fp_ptr[i] = buf[m++]; + } else { + double *fp_ptr = (double *)fp_pinned; + for (i = first; i < last; i++) fp_ptr[i] = buf[m++]; + } } /* ---------------------------------------------------------------------- set coeffs for one or more type pairs read DYNAMO setfl file ------------------------------------------------------------------------- */ void PairEAMAlloyGPU::coeff(int narg, char **arg) { int i,j; if (!allocated) allocate(); if (narg != 3 + atom->ntypes) error->all(FLERR,"Incorrect args for pair coefficients"); // insure I,J args are * * if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) error->all(FLERR,"Incorrect args for pair coefficients"); // read EAM setfl file if (setfl) { for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i]; delete [] setfl->elements; delete [] setfl->mass; memory->destroy(setfl->frho); memory->destroy(setfl->rhor); memory->destroy(setfl->z2r); delete setfl; } setfl = new Setfl(); read_file(arg[2]); // read args that map atom types to elements in potential file // map[i] = which element the Ith atom type is, -1 if NULL for (i = 3; i < narg; i++) { if (strcmp(arg[i],"NULL") == 0) { map[i-2] = -1; continue; } for (j = 0; j < setfl->nelements; j++) if (strcmp(arg[i],setfl->elements[j]) == 0) break; if (j < setfl->nelements) map[i-2] = j; else error->all(FLERR,"No matching element in EAM potential file"); } // clear setflag since coeff() called once with I,J = * * int n = atom->ntypes; for (i = 1; i <= n; i++) for (j = i; j <= n; j++) setflag[i][j] = 0; // set setflag i,j for type pairs where both are mapped to elements // set mass of atom type if i = j int count = 0; for (i = 1; i <= n; i++) { for (j = i; j <= n; j++) { if (map[i] >= 0 && map[j] >= 0) { setflag[i][j] = 1; if (i == j) atom->set_mass(i,setfl->mass[map[i]]); count++; } } } if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); } /* ---------------------------------------------------------------------- read a multi-element DYNAMO setfl file ------------------------------------------------------------------------- */ void PairEAMAlloyGPU::read_file(char *filename) { Setfl *file = setfl; // open potential file int me = comm->me; FILE *fptr; char line[MAXLINE]; if (me == 0) { fptr = fopen(filename,"r"); if (fptr == NULL) { char str[128]; sprintf(str,"Cannot open EAM potential file %s",filename); error->one(FLERR,str); } } // read and broadcast header // extract element names from nelements line int n; if (me == 0) { fgets(line,MAXLINE,fptr); fgets(line,MAXLINE,fptr); fgets(line,MAXLINE,fptr); fgets(line,MAXLINE,fptr); n = strlen(line) + 1; } MPI_Bcast(&n,1,MPI_INT,0,world); MPI_Bcast(line,n,MPI_CHAR,0,world); sscanf(line,"%d",&file->nelements); int nwords = atom->count_words(line); if (nwords != file->nelements + 1) error->all(FLERR,"Incorrect element names in EAM potential file"); char **words = new char*[file->nelements+1]; nwords = 0; strtok(line," \t\n\r\f"); while ( (words[nwords++] = strtok(NULL," \t\n\r\f")) ) continue; file->elements = new char*[file->nelements]; for (int i = 0; i < file->nelements; i++) { n = strlen(words[i]) + 1; file->elements[i] = new char[n]; strcpy(file->elements[i],words[i]); } delete [] words; if (me == 0) { fgets(line,MAXLINE,fptr); sscanf(line,"%d %lg %d %lg %lg", &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut); } MPI_Bcast(&file->nrho,1,MPI_INT,0,world); MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world); MPI_Bcast(&file->nr,1,MPI_INT,0,world); MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world); MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world); file->mass = new double[file->nelements]; memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho"); memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor"); memory->create(file->z2r,file->nelements,file->nelements,file->nr+1, "pair:z2r"); int i,j,tmp; for (i = 0; i < file->nelements; i++) { if (me == 0) { fgets(line,MAXLINE,fptr); sscanf(line,"%d %lg",&tmp,&file->mass[i]); } MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world); if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]); MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world); if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]); MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world); } for (i = 0; i < file->nelements; i++) for (j = 0; j <= i; j++) { if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]); MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world); } // close the potential file if (me == 0) fclose(fptr); } /* ---------------------------------------------------------------------- copy read-in setfl potential to standard array format ------------------------------------------------------------------------- */ void PairEAMAlloyGPU::file2array() { int i,j,m,n; int ntypes = atom->ntypes; // set function params directly from setfl file nrho = setfl->nrho; nr = setfl->nr; drho = setfl->drho; dr = setfl->dr; rhomax = (nrho-1) * drho; // ------------------------------------------------------------------ // setup frho arrays // ------------------------------------------------------------------ // allocate frho arrays // nfrho = # of setfl elements + 1 for zero array nfrho = setfl->nelements + 1; memory->destroy(frho); memory->create(frho,nfrho,nrho+1,"pair:frho"); // copy each element's frho to global frho for (i = 0; i < setfl->nelements; i++) for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m]; // add extra frho of zeroes for non-EAM types to point to (pair hybrid) // this is necessary b/c fp is still computed for non-EAM atoms for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0; // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to // if atom type doesn't point to element (non-EAM atom in pair hybrid) // then map it to last frho array of zeroes for (i = 1; i <= ntypes; i++) if (map[i] >= 0) type2frho[i] = map[i]; else type2frho[i] = nfrho-1; // ------------------------------------------------------------------ // setup rhor arrays // ------------------------------------------------------------------ // allocate rhor arrays // nrhor = # of setfl elements nrhor = setfl->nelements; memory->destroy(rhor); memory->create(rhor,nrhor,nr+1,"pair:rhor"); // copy each element's rhor to global rhor for (i = 0; i < setfl->nelements; i++) for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m]; // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to // for setfl files, I,J mapping only depends on I // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used for (i = 1; i <= ntypes; i++) for (j = 1; j <= ntypes; j++) type2rhor[i][j] = map[i]; // ------------------------------------------------------------------ // setup z2r arrays // ------------------------------------------------------------------ // allocate z2r arrays // nz2r = N*(N+1)/2 where N = # of setfl elements nz2r = setfl->nelements * (setfl->nelements+1) / 2; memory->destroy(z2r); memory->create(z2r,nz2r,nr+1,"pair:z2r"); // copy each element pair z2r to global z2r, only for I >= J n = 0; for (i = 0; i < setfl->nelements; i++) for (j = 0; j <= i; j++) { for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m]; n++; } // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to // set of z2r arrays only fill lower triangular Nelement matrix // value = n = sum over rows of lower-triangular matrix until reach irow,icol // swap indices when irow < icol to stay lower triangular // if map = -1 (non-EAM atom in pair hybrid): // type2z2r is not used by non-opt // but set type2z2r to 0 since accessed by opt int irow,icol; for (i = 1; i <= ntypes; i++) { for (j = 1; j <= ntypes; j++) { irow = map[i]; icol = map[j]; if (irow == -1 || icol == -1) { type2z2r[i][j] = 0; continue; } if (irow < icol) { irow = map[j]; icol = map[i]; } n = 0; for (m = 0; m < irow; m++) n += m + 1; n += icol; type2z2r[i][j] = n; } } } diff --git a/src/GPU/pair_eam_alloy_gpu.h b/src/GPU/pair_eam_alloy_gpu.h index 9513cab83..e19fd0c26 100644 --- a/src/GPU/pair_eam_alloy_gpu.h +++ b/src/GPU/pair_eam_alloy_gpu.h @@ -1,63 +1,77 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifdef PAIR_CLASS PairStyle(eam/alloy/gpu,PairEAMAlloyGPU) #else #ifndef LMP_PAIR_EAM_ALLOY_GPU_H #define LMP_PAIR_EAM_ALLOY_GPU_H -#include "pair_eam_gpu.h" +#include "pair_eam.h" namespace LAMMPS_NS { -class PairEAMAlloyGPU : public PairEAMGPU { +class PairEAMAlloyGPU : public PairEAM { public: PairEAMAlloyGPU(class LAMMPS *); - virtual ~PairEAMAlloyGPU() {} + virtual ~PairEAMAlloyGPU(); void coeff(int, char **); + void compute(int, int); + void init_style(); + double single(int, int, int, int, double, double, double, double &); + double memory_usage(); + + int pack_forward_comm(int, int *, double *, int, int *); + void unpack_forward_comm(int, int, double *); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; protected: void read_file(char *); void file2array(); + + int gpu_mode; + double cpu_time; + void *fp_pinned; + bool fp_single; }; } #endif #endif /* ERROR/WARNING messages: E: Incorrect args for pair coefficients Self-explanatory. Check the input script or data file. E: No matching element in EAM potential file The EAM potential file does not contain elements that match the requested elements. E: Cannot open EAM potential file %s The specified EAM potential file cannot be opened. Check that the path and name are correct. E: Incorrect element names in EAM potential file The element names in the EAM file do not match those requested. */ diff --git a/src/GPU/pair_eam_fs_gpu.cpp b/src/GPU/pair_eam_fs_gpu.cpp index a6e80547e..3c5c486a6 100644 --- a/src/GPU/pair_eam_fs_gpu.cpp +++ b/src/GPU/pair_eam_fs_gpu.cpp @@ -1,333 +1,572 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Trung Dac Nguyen (ORNL), W. Michael Brown (ORNL) ------------------------------------------------------------------------- */ #include "stdio.h" #include "stdlib.h" #include "string.h" #include "pair_eam_fs_gpu.h" #include "atom.h" +#include "force.h" #include "comm.h" +#include "neighbor.h" +#include "neigh_list.h" #include "memory.h" #include "error.h" +#include "neigh_request.h" +#include "gpu_extra.h" using namespace LAMMPS_NS; #define MAXLINE 1024 +// External functions from cuda library for atom decomposition + +int eam_fs_gpu_init(const int ntypes, double host_cutforcesq, + int **host_type2rhor, int **host_type2z2r, + int *host_type2frho, double ***host_rhor_spline, + double ***host_z2r_spline, double ***host_frho_spline, + double rdr, double rdrho, double rhomax, + int nrhor, int nrho, int nz2r, int nfrho, int nr, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, int &gpu_mode, + FILE *screen, int &fp_size); +void eam_fs_gpu_clear(); +int** eam_fs_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *tag, int **nspecial, tagint **special, + const bool eflag, const bool vflag, const bool eatom, + const bool vatom, int &host_start, int **ilist, + int **jnum, const double cpu_time, bool &success, + int &inum, void **fp_ptr); +void eam_fs_gpu_compute(const int ago, const int inum_full, const int nlocal, + const int nall,double **host_x, int *host_type, + int *ilist, int *numj, int **firstneigh, + const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, void **fp_ptr); +void eam_fs_gpu_compute_force(int *ilist, const bool eflag, const bool vflag, + const bool eatom, const bool vatom); +double eam_fs_gpu_bytes(); + +/* ---------------------------------------------------------------------- */ + +PairEAMFSGPU::PairEAMFSGPU(LAMMPS *lmp) : PairEAM(lmp), gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + reinitflag = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- */ + +PairEAMFSGPU::~PairEAMFSGPU() +{ + eam_fs_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +double PairEAMFSGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + eam_fs_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairEAMFSGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + // compute density on each atom on GPU + + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + int inum, host_start, inum_dev; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = eam_fs_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, inum_dev, &fp_pinned); + } else { // gpu_mode == GPU_FORCE + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + eam_fs_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success, &fp_pinned); + } + + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); + + // communicate derivative of embedding function + + comm->forward_comm_pair(this); + + // compute forces on each atom on GPU + if (gpu_mode != GPU_FORCE) + eam_fs_gpu_compute_force(NULL, eflag, vflag, eflag_atom, vflag_atom); + else + eam_fs_gpu_compute_force(ilist, eflag, vflag, eflag_atom, vflag_atom); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairEAMFSGPU::init_style() +{ + if (force->newton_pair) + error->all(FLERR,"Cannot use newton pair with eam/fs/gpu pair style"); + + // convert read-in file(s) to arrays and spline them + + file2array(); + array2spline(); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int fp_size; + int success = eam_fs_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r, + type2frho, rhor_spline, z2r_spline, frho_spline, + rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, + atom->nlocal, atom->nlocal+atom->nghost, 300, + maxspecial, cell_size, gpu_mode, screen, fp_size); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } + + if (fp_size == sizeof(double)) + fp_single = false; + else + fp_single = true; +} + +/* ---------------------------------------------------------------------- */ + +double PairEAMFSGPU::single(int i, int j, int itype, int jtype, + double rsq, double factor_coul, double factor_lj, + double &fforce) +{ + int m; + double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip; + double *coeff; + + r = sqrt(rsq); + p = r*rdr + 1.0; + m = static_cast<int> (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + coeff = rhor_spline[type2rhor[itype][jtype]][m]; + rhoip = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = rhor_spline[type2rhor[jtype][itype]][m]; + rhojp = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = z2r_spline[type2z2r[itype][jtype]][m]; + z2p = (coeff[0]*p + coeff[1])*p + coeff[2]; + z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + + double fp_i,fp_j; + if (fp_single == false) { + fp_i = ((double*)fp_pinned)[i]; + fp_j = ((double*)fp_pinned)[j]; + } else { + fp_i = ((float*)fp_pinned)[i]; + fp_j = ((float*)fp_pinned)[j]; + } + + recip = 1.0/r; + phi = z2*recip; + phip = z2p*recip - phi*recip; + psip = fp_i*rhojp + fp_j*rhoip + phip; + fforce = -psip*recip; + + return phi; +} + +/* ---------------------------------------------------------------------- */ + +int PairEAMFSGPU::pack_forward_comm(int n, int *list, double *buf, + int pbc_flag,int *pbc) +{ + int i,j,m; + + m = 0; + + if (fp_single) { + float *fp_ptr = (float *)fp_pinned; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = static_cast<double>(fp_ptr[j]); + } + } else { + double *fp_ptr = (double *)fp_pinned; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = fp_ptr[j]; + } + } + + return m; +} + /* ---------------------------------------------------------------------- */ -PairEAMFSGPU::PairEAMFSGPU(LAMMPS *lmp) : PairEAMGPU(lmp) +void PairEAMFSGPU::unpack_forward_comm(int n, int first, double *buf) { - one_coeff = 1; + int i,m,last; + + m = 0; + last = first + n; + if (fp_single) { + float *fp_ptr = (float *)fp_pinned; + for (i = first; i < last; i++) fp_ptr[i] = buf[m++]; + } else { + double *fp_ptr = (double *)fp_pinned; + for (i = first; i < last; i++) fp_ptr[i] = buf[m++]; + } } /* ---------------------------------------------------------------------- set coeffs for one or more type pairs read EAM Finnis-Sinclair file ------------------------------------------------------------------------- */ void PairEAMFSGPU::coeff(int narg, char **arg) { int i,j; if (!allocated) allocate(); if (narg != 3 + atom->ntypes) error->all(FLERR,"Incorrect args for pair coefficients"); // insure I,J args are * * if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) error->all(FLERR,"Incorrect args for pair coefficients"); // read EAM Finnis-Sinclair file if (fs) { for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i]; delete [] fs->elements; delete [] fs->mass; memory->destroy(fs->frho); memory->destroy(fs->rhor); memory->destroy(fs->z2r); delete fs; } fs = new Fs(); read_file(arg[2]); // read args that map atom types to elements in potential file // map[i] = which element the Ith atom type is, -1 if NULL for (i = 3; i < narg; i++) { if (strcmp(arg[i],"NULL") == 0) { map[i-2] = -1; continue; } for (j = 0; j < fs->nelements; j++) if (strcmp(arg[i],fs->elements[j]) == 0) break; if (j < fs->nelements) map[i-2] = j; else error->all(FLERR,"No matching element in EAM potential file"); } // clear setflag since coeff() called once with I,J = * * int n = atom->ntypes; for (i = 1; i <= n; i++) for (j = i; j <= n; j++) setflag[i][j] = 0; // set setflag i,j for type pairs where both are mapped to elements // set mass of atom type if i = j int count = 0; for (i = 1; i <= n; i++) { for (j = i; j <= n; j++) { if (map[i] >= 0 && map[j] >= 0) { setflag[i][j] = 1; if (i == j) atom->set_mass(i,fs->mass[map[i]]); count++; } } } if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); } /* ---------------------------------------------------------------------- read a multi-element DYNAMO setfl file ------------------------------------------------------------------------- */ void PairEAMFSGPU::read_file(char *filename) { Fs *file = fs; // open potential file int me = comm->me; FILE *fptr; char line[MAXLINE]; if (me == 0) { - fptr = fopen(filename,"r"); + fptr = force->open_potential(filename); if (fptr == NULL) { char str[128]; sprintf(str,"Cannot open EAM potential file %s",filename); error->one(FLERR,str); } } // read and broadcast header // extract element names from nelements line int n; if (me == 0) { fgets(line,MAXLINE,fptr); fgets(line,MAXLINE,fptr); fgets(line,MAXLINE,fptr); fgets(line,MAXLINE,fptr); n = strlen(line) + 1; } MPI_Bcast(&n,1,MPI_INT,0,world); MPI_Bcast(line,n,MPI_CHAR,0,world); sscanf(line,"%d",&file->nelements); int nwords = atom->count_words(line); if (nwords != file->nelements + 1) error->all(FLERR,"Incorrect element names in EAM potential file"); char **words = new char*[file->nelements+1]; nwords = 0; strtok(line," \t\n\r\f"); - while ( (words[nwords++] = strtok(NULL," \t\n\r\f")) ) continue; + while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; file->elements = new char*[file->nelements]; for (int i = 0; i < file->nelements; i++) { n = strlen(words[i]) + 1; file->elements[i] = new char[n]; strcpy(file->elements[i],words[i]); } delete [] words; if (me == 0) { fgets(line,MAXLINE,fptr); sscanf(line,"%d %lg %d %lg %lg", &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut); } MPI_Bcast(&file->nrho,1,MPI_INT,0,world); MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world); MPI_Bcast(&file->nr,1,MPI_INT,0,world); MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world); MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world); file->mass = new double[file->nelements]; memory->create(file->frho,file->nelements,file->nrho+1, "pair:frho"); memory->create(file->rhor,file->nelements,file->nelements, file->nr+1,"pair:rhor"); memory->create(file->z2r,file->nelements,file->nelements, file->nr+1,"pair:z2r"); int i,j,tmp; for (i = 0; i < file->nelements; i++) { if (me == 0) { fgets(line,MAXLINE,fptr); sscanf(line,"%d %lg",&tmp,&file->mass[i]); } MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world); if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]); MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world); for (j = 0; j < file->nelements; j++) { if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]); MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world); } } for (i = 0; i < file->nelements; i++) for (j = 0; j <= i; j++) { if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]); MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world); } // close the potential file if (me == 0) fclose(fptr); } /* ---------------------------------------------------------------------- copy read-in setfl potential to standard array format ------------------------------------------------------------------------- */ void PairEAMFSGPU::file2array() { int i,j,m,n; int ntypes = atom->ntypes; // set function params directly from fs file nrho = fs->nrho; nr = fs->nr; drho = fs->drho; dr = fs->dr; rhomax = (nrho-1) * drho; // ------------------------------------------------------------------ // setup frho arrays // ------------------------------------------------------------------ // allocate frho arrays // nfrho = # of fs elements + 1 for zero array nfrho = fs->nelements + 1; memory->destroy(frho); memory->create(frho,nfrho,nrho+1,"pair:frho"); // copy each element's frho to global frho for (i = 0; i < fs->nelements; i++) for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m]; // add extra frho of zeroes for non-EAM types to point to (pair hybrid) // this is necessary b/c fp is still computed for non-EAM atoms for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0; // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to // if atom type doesn't point to element (non-EAM atom in pair hybrid) // then map it to last frho array of zeroes for (i = 1; i <= ntypes; i++) if (map[i] >= 0) type2frho[i] = map[i]; else type2frho[i] = nfrho-1; // ------------------------------------------------------------------ // setup rhor arrays // ------------------------------------------------------------------ // allocate rhor arrays // nrhor = square of # of fs elements nrhor = fs->nelements * fs->nelements; memory->destroy(rhor); memory->create(rhor,nrhor,nr+1,"pair:rhor"); // copy each element pair rhor to global rhor n = 0; for (i = 0; i < fs->nelements; i++) for (j = 0; j < fs->nelements; j++) { for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m]; n++; } // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to // for fs files, there is a full NxN set of rhor arrays // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used for (i = 1; i <= ntypes; i++) for (j = 1; j <= ntypes; j++) type2rhor[i][j] = map[i] * fs->nelements + map[j]; // ------------------------------------------------------------------ // setup z2r arrays // ------------------------------------------------------------------ // allocate z2r arrays // nz2r = N*(N+1)/2 where N = # of fs elements nz2r = fs->nelements * (fs->nelements+1) / 2; memory->destroy(z2r); memory->create(z2r,nz2r,nr+1,"pair:z2r"); // copy each element pair z2r to global z2r, only for I >= J n = 0; for (i = 0; i < fs->nelements; i++) for (j = 0; j <= i; j++) { for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m]; n++; } // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to // set of z2r arrays only fill lower triangular Nelement matrix // value = n = sum over rows of lower-triangular matrix until reach irow,icol // swap indices when irow < icol to stay lower triangular // if map = -1 (non-EAM atom in pair hybrid): // type2z2r is not used by non-opt // but set type2z2r to 0 since accessed by opt int irow,icol; for (i = 1; i <= ntypes; i++) { for (j = 1; j <= ntypes; j++) { irow = map[i]; icol = map[j]; if (irow == -1 || icol == -1) { type2z2r[i][j] = 0; continue; } if (irow < icol) { irow = map[j]; icol = map[i]; } n = 0; for (m = 0; m < irow; m++) n += m + 1; n += icol; type2z2r[i][j] = n; } } } diff --git a/src/GPU/pair_eam_fs_gpu.h b/src/GPU/pair_eam_fs_gpu.h index d61d398a8..9ef436c3f 100644 --- a/src/GPU/pair_eam_fs_gpu.h +++ b/src/GPU/pair_eam_fs_gpu.h @@ -1,63 +1,77 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifdef PAIR_CLASS PairStyle(eam/fs/gpu,PairEAMFSGPU) #else #ifndef LMP_PAIR_EAM_FS_GPU_H #define LMP_PAIR_EAM_FS_GPU_H -#include "pair_eam_gpu.h" +#include "pair_eam.h" namespace LAMMPS_NS { -class PairEAMFSGPU : public PairEAMGPU { +class PairEAMFSGPU : public PairEAM { public: PairEAMFSGPU(class LAMMPS *); - virtual ~PairEAMFSGPU() {} + virtual ~PairEAMFSGPU(); void coeff(int, char **); + void compute(int, int); + void init_style(); + double single(int, int, int, int, double, double, double, double &); + double memory_usage(); + + int pack_forward_comm(int, int *, double *, int, int *); + void unpack_forward_comm(int, int, double *); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; protected: void read_file(char *); void file2array(); + + int gpu_mode; + double cpu_time; + void *fp_pinned; + bool fp_single; }; } #endif #endif /* ERROR/WARNING messages: E: Incorrect args for pair coefficients Self-explanatory. Check the input script or data file. E: No matching element in EAM potential file The EAM potential file does not contain elements that match the requested elements. E: Cannot open EAM potential file %s The specified EAM potential file cannot be opened. Check that the path and name are correct. E: Incorrect element names in EAM potential file The element names in the EAM file do not match those requested. */