diff --git a/Benchmarks/ConfigFiles/90Pot.par b/Benchmarks/ConfigFiles/90Pot.par index df1aad4..1261aca 100644 --- a/Benchmarks/ConfigFiles/90Pot.par +++ b/Benchmarks/ConfigFiles/90Pot.par @@ -1,946 +1,946 @@ #The user defines which mode of lenstool he wants to run here runmode source 0 sources.cat image 1 ../ConfigFiles/219Img.txt #inverse 0 grid 1 22 1.5 poten 1 1000 1.5 mass 1 1000 0.4 1.5 dpl 1 1000 1.5 amplif 1 1000 1.5 - nbgridcells 1000 + nbgridcells 5000 Debug # true activates debug mode end frame #dmax 45 #either dmax or x and y have to be declared when a grid is used xmin -50.000 xmax 50.000 ymin -50.000 ymax 50.000 end cosmology model 1 H0 70.000 omegaM 0.300 omegaX 0.700 omegaK 0.000 wA 0.000 wX -1.000 end potentiel 1 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 0.000 #X Position [arcsec] y_centre 0.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 800. #Dispersion Velocity [km/s] rcut 500 #Cut radius (PIEMD distribution) rcore 5 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 2 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 25.000 #X Position [arcsec] y_centre 20.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 140. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 4 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre -16.000 #X Position [arcsec] y_centre 2.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 130. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 5 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre -10.000 #X Position [arcsec] y_centre 0.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 110. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 6 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 3.000 #X Position [arcsec] y_centre -10.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 80. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 120. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end potentiel 7 profil 8 #Profile: 5 SIS, 8 PIEMD x_centre 17.000 #X Position [arcsec] y_centre -7.000 #Y Position [arcsec] ellipticity 0.11 #Ellipticity v_disp 1. #Dispersion Velocity [km/s] rcut 150 #Cut radius (PIEMD distribution) rcore 1 #Core radius (PIEMD distribution) z_lens 0.4 #Redshift of lens end limit 1 x_centre 1 -1.0 1.0 0.01000 y_centre 1 -1.0 1.0 0.010000 ellipticity 1 0.05 0.75 0.01 angle_pos 0 0. 180.0 0.1 rcore 0 0.1 4. 0.100000 rcut 0 1.0 20.0 0.10000 v_disp 0 200.0 1000.0 0.10000 end cline nplan 1 1.5 dmax 50 limitLow 0.2 limitHigh 10 nbgridcells 1000 end finish diff --git a/Benchmarks/GridGradientBenchmark/cudafunctions.cu b/Benchmarks/GridGradientBenchmark/cudafunctions.cu new file mode 100644 index 0000000..9539941 --- /dev/null +++ b/Benchmarks/GridGradientBenchmark/cudafunctions.cu @@ -0,0 +1,39 @@ +/** +* @file cudafunctions.cpp +* @Author Christoph Schaefer, EPFL (christophernstrerne.schaefer@epfl.ch) +* @date July 2015 +* @version 0,1 +* @brief Functions for safe calling of cuda function and timing functions +* +*/ + + + + + + +/// Include +///========================================================================================================== +#include<stdio.h> +#include<string> +#include<strings.h> +#include<stdlib.h> +#include<iostream> +#include<fstream> +#include<math.h> +#include<time.h> +#include"cudafunctions.cuh" +#include <cuda_runtime.h> +#include<typeinfo> + + + +/** @brief Safe Cuda call function. Checks for error, prints and then stops the execution. +*/ +void cudasafe( cudaError_t error, std::string message) +{ + if(error!=cudaSuccess) { + fprintf(stderr,"ERROR: %s : %s \n",message.c_str(),cudaGetErrorString(error)); + exit(-1); + } +} diff --git a/Benchmarks/GridGradientBenchmark/cudafunctions.cuh b/Benchmarks/GridGradientBenchmark/cudafunctions.cuh new file mode 100644 index 0000000..5208385 --- /dev/null +++ b/Benchmarks/GridGradientBenchmark/cudafunctions.cuh @@ -0,0 +1,33 @@ +/** +* @file cudafunctions.cuh +* @Author Christoph Schaefer, EPFL (christophernstrerne.schaefer@epfl.ch) +* @date July 2015 +* @version 0,1 +* @brief Header file for cudafunctions.cu +* +*/ + + + + + + + +//Header guard +#ifndef CUDAFUNCTIONS_CUH +#define CUDAFUNCTIONS_CUH + + + + +// Include +//=========================================================================================================== +#include <strings.h> +#include "structure.h" +#include <iostream> + +void cudasafe( cudaError_t error, std::string message); + + +//End header guard +#endif diff --git a/Benchmarks/GridGradientBenchmark/gradient_GPU.cu b/Benchmarks/GridGradientBenchmark/gradient_GPU.cu new file mode 100644 index 0000000..c26d7af --- /dev/null +++ b/Benchmarks/GridGradientBenchmark/gradient_GPU.cu @@ -0,0 +1,350 @@ + +#include "gradient_GPU.cuh" + +#include <iostream> +#include <string.h> +#include <cuda_runtime.h> +#include <math.h> +#include <sys/time.h> +#include <fstream> +#include <immintrin.h> +#include <map> +/* +#ifdef __AVX__ +#include "simd_math_avx.h" +#endif +#ifdef __AVX512F__ +#include "simd_math_avx512f.h" +#endif +*/ +#include "structure_hpc.h" +#include "utils.hpp" + + + +// +// SOA versions, vectorizable +// +__device__ struct point module_potentialDerivatives_totalGradient_5_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) +{ + //asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA begins"); + // + struct point grad, clumpgrad; + grad.x = 0; + grad.y = 0; + for(int i = shalos; i < shalos + nhalos; i++) + { + // + struct point true_coord, true_coord_rotation; + // + true_coord.x = pImage->x - lens->position_x[i]; + true_coord.y = pImage->y - lens->position_y[i]; + // + true_coord_rotation = rotateCoordinateSystem_GPU(true_coord, lens->ellipticity_angle[i]); + double R = sqrt(true_coord_rotation.x*true_coord_rotation.x*(1 - lens->ellipticity_potential[i])+true_coord_rotation.y*true_coord_rotation.y*(1 + lens->ellipticity_potential[i])); + // + grad.x += (1 - lens->ellipticity[i]/3.)*lens->b0[i]*true_coord_rotation.x/R; + grad.y += (1 + lens->ellipticity[i]/3.)*lens->b0[i]*true_coord_rotation.y/R; + } + return grad; +} + +// +// +// +__device__ struct point module_potentialDerivatives_totalGradient_8_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) +{ + //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins"); + // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 + // + struct point grad, clumpgrad; + grad.x = 0; + grad.y = 0; + // + for(int i = shalos; i < shalos + nhalos; i++) + { + //IACA_START; + // + struct point true_coord, true_coord_rot; //, result; + //double R, angular_deviation; + complex zis; + // + //result.x = result.y = 0.; + // + true_coord.x = pImage->x - lens->position_x[i]; + true_coord.y = pImage->y - lens->position_y[i]; + double cosi,sinu; + cosi = cos(lens->ellipticity_angle[i]); + sinu = sin(lens->ellipticity_angle[i]); + //positionning at the potential center + // Change the origin of the coordinate system to the center of the clump + //true_coord_rot = rotateCoordinateSystem_GPU(true_coord, lens->ellipticity_angle[i]); + true_coord_rot = rotateCoordinateSystem_GPU_2(true_coord, cosi,sinu); + // + double x = true_coord_rot.x; + double y = true_coord_rot.y; + double eps = lens->ellipticity_potential[i]; + double rc = lens->rcore[i]; + // + //std::cout << "piemd_lderivatives" << std::endl; + // + double sqe = sqrt(eps); + // + double cx1 = (1. - eps)/(1. + eps); + double cxro = (1. + eps)*(1. + eps); + double cyro = (1. - eps)*(1. - eps); + // + double rem2 = x*x/cxro + y*y/cyro; + // + complex zci, znum, zden, zres; + double norm; + // + zci.re = 0; + zci.im = -0.5*(1. - eps*eps)/sqe; + // + znum.re = cx1*x; + znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1; + // + zden.re = x; + zden.im = 2.*rc*sqe - y; + norm = (zden.re*zden.re + zden.im*zden.im); // zis = znum/zden + // + zis.re = (znum.re*zden.re + znum.im*zden.im)/norm; + zis.im = (znum.im*zden.re - znum.re*zden.im)/norm; + norm = zis.re; + zis.re = log(sqrt(norm*norm + zis.im*zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) + zis.im = atan2(zis.im, norm); + // norm = zis.re; + zres.re = zci.re*zis.re - zci.im*zis.im; // Re( zci*ln(zis) ) + zres.im = zci.im*zis.re + zis.im*zci.re; // Im( zci*ln(zis) ) + // + zis.re = zres.re; + zis.im = zres.im; + // + //zres.re = zis.re*b0; + //zres.im = zis.im*b0; + // rotation + clumpgrad.x = zis.re; + clumpgrad.y = zis.im; + clumpgrad = rotateCoordinateSystem_GPU_2(clumpgrad, cosi,-sinu); + // + clumpgrad.x = lens->b0[i]*clumpgrad.x; + clumpgrad.y = lens->b0[i]*clumpgrad.y; + // + //clumpgrad.x = lens->b0[i]*zis.re; + //clumpgrad.y = lens->b0[i]*zis.im; + //nan check + //if(clumpgrad.x == clumpgrad.x or clumpgrad.y == clumpgrad.y) + //{ + // add the gradients + grad.x += clumpgrad.x; + grad.y += clumpgrad.y; + //} + } + //IACA_END; + // + return(grad); +} + + +__device__ struct point module_potentialDerivatives_totalGradient_81_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) +{ + //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins"); + //std::cout << "# module_potentialDerivatives_totalGradient_SOA begins" << std::endl; + // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 + // + struct point grad, clumpgrad; + grad.x = 0; + grad.y = 0; + // + for(int i = shalos; i < shalos + nhalos; i++) + { + //IACA_START; + // + struct point true_coord, true_coord_rot; //, result; + //double R, angular_deviation; + complex zis; + // + //result.x = result.y = 0.; + // + true_coord.x = pImage->x - lens->position_x[i]; + true_coord.y = pImage->y - lens->position_y[i]; + //positionning at the potential center + // Change the origin of the coordinate system to the center of the clump + true_coord_rot = rotateCoordinateSystem_GPU(true_coord, lens->ellipticity_angle[i]); + // + double x = true_coord_rot.x; + double y = true_coord_rot.y; + double eps = lens->ellipticity_potential[i]; + double rc = lens->rcore[i]; + double rcut = lens->rcut[i]; + double b0 = lens->b0[i]; + double t05 = b0*rcut/(rcut - rc); + //printf("b0 = %f, rcut = %f, rc = %f, t05 = %f\n", b0, rcut, rc, t05); + // + //std::cout << "piemd_lderivatives" << std::endl; + // + double sqe = sqrt(eps); + // + double cx1 = (1. - eps)/(1. + eps); + double cxro = (1. + eps)*(1. + eps); + double cyro = (1. - eps)*(1. - eps); + // + double rem2 = x*x/cxro + y*y/cyro; + // + complex zci, znum, zden, zres_rc, zres_rcut; + double norm; + // + zci.re = 0; + zci.im = -0.5*(1. - eps*eps)/sqe; + // + // step 1 + // + { + znum.re = cx1*x; + znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1; + // + zden.re = x; + zden.im = 2.*rc*sqe - y; + norm = (zden.re*zden.re + zden.im*zden.im); // zis = znum/zden + // + zis.re = (znum.re*zden.re + znum.im*zden.im)/norm; + zis.im = (znum.im*zden.re - znum.re*zden.im)/norm; + norm = zis.re; + zis.re = log(sqrt(norm*norm + zis.im*zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) + zis.im = atan2(zis.im, norm); + // norm = zis.re; + zres_rc.re = zci.re*zis.re - zci.im*zis.im; // Re( zci*ln(zis) ) + zres_rc.im = zci.im*zis.re + zis.im*zci.re; // Im( zci*ln(zis) ) + } + // + // step 2 + // + { + znum.re = cx1*x; + znum.im = 2.*sqe*sqrt(rcut*rcut + rem2) - y/cx1; + // + zden.re = x; + zden.im = 2.*rcut*sqe - y; + norm = (zden.re*zden.re + zden.im*zden.im); // zis = znum/zden + // + zis.re = (znum.re*zden.re + znum.im*zden.im)/norm; + zis.im = (znum.im*zden.re - znum.re*zden.im)/norm; + norm = zis.re; + zis.re = log(sqrt(norm*norm + zis.im*zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) + zis.im = atan2(zis.im, norm); + // norm = zis.re; + zres_rcut.re = zci.re*zis.re - zci.im*zis.im; // Re( zci*ln(zis) ) + zres_rcut.im = zci.im*zis.re + zis.im*zci.re; // Im( zci*ln(zis) ) + } + zis.re = t05*(zres_rc.re - zres_rcut.re); + zis.im = t05*(zres_rc.im - zres_rcut.im); + //printf("%f %f\n", zis.re, zis.im); + // + //zres.re = zis.re*b0; + //zres.im = zis.im*b0; + // rotation + clumpgrad.x = zis.re; + clumpgrad.y = zis.im; + clumpgrad = rotateCoordinateSystem_GPU(clumpgrad, -lens->ellipticity_angle[i]); + // + //clumpgrad.x = lens->b0[i]*clumpgrad.x; + //clumpgrad.y = lens->b0[i]*clumpgrad.y; + // + //clumpgrad.x = lens->b0[i]*zis.re; + //clumpgrad.y = lens->b0[i]*zis.im; + //nan check + //if(clumpgrad.x == clumpgrad.x or clumpgrad.y == clumpgrad.y) + //{ + // add the gradients + grad.x += clumpgrad.x; + grad.y += clumpgrad.y; + //printf("grad = %f %f\n", clumpgrad.x, clumpgrad.y); + //} + } + //IACA_END; + // + return(grad); +} +// +// +// + +typedef struct point (*halo_func_GPU_t) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos); + +__constant__ halo_func_GPU_t halo_func_GPU[100] = +{ +0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_5_SOA_GPU, 0, 0, module_potentialDerivatives_totalGradient_8_SOA_GPU, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, module_potentialDerivatives_totalGradient_81_SOA_GPU, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +// +// +// +__device__ struct point module_potentialDerivatives_totalGradient_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int nhalos) +{ + struct point grad, clumpgrad; + // + grad.x = clumpgrad.x = 0; + grad.y = clumpgrad.y = 0; + // + int shalos = 0; + // + //module_potentialDerivatives_totalGradient_81_SOA(pImage, lens, 0, nhalos); + //return; + /* + int* p_type = &(lens->type)[0]; + int* lens_type = (int*) malloc(nhalos*sizeof(int)); + memcpy(lens_type, &(lens->type)[0], nhalos*sizeof(int)); + + //quicksort(lens_type, nhalos); + //*/ + + //printf ("%f \n",lens->position_x[shalos]); + + + while (shalos < nhalos) + { + + int lens_type = lens->type[shalos]; + int count = 1; + while (lens->type[shalos + count] == lens_type) count++; + //std::cerr << "type = " << lens_type << " " << count << " " << shalos << std::endl; + //printf ("%d %d %d \n",lens_type,count,shalos); + // + clumpgrad = (*halo_func_GPU[lens_type])(pImage, lens, shalos, count); + // + grad.x += clumpgrad.x; + grad.y += clumpgrad.y; + shalos += count; + } + + return(grad); +} + +__device__ inline struct point rotateCoordinateSystem_GPU(struct point P, double theta) +{ + struct point Q; + + Q.x = P.x*cos(theta) + P.y*sin(theta); + Q.y = P.y*cos(theta) - P.x*sin(theta); + + return(Q); +} + +__device__ inline struct point rotateCoordinateSystem_GPU_2(struct point P, double cosi, double sinu) +{ + struct point Q; + + Q.x = P.x*cosi + P.y*sinu; + Q.y = P.y*cosi - P.x*sinu; + + return(Q); +} diff --git a/Benchmarks/GridGradientBenchmark/gradient_GPU.cuh b/Benchmarks/GridGradientBenchmark/gradient_GPU.cuh new file mode 100644 index 0000000..4cb3d20 --- /dev/null +++ b/Benchmarks/GridGradientBenchmark/gradient_GPU.cuh @@ -0,0 +1,20 @@ +/* + * gradient_GPU.cuh + * + * Created on: Feb 1, 2017 + * Author: cerschae + */ + +#ifndef GRADIENT_GPU_CUH_ +#define GRADIENT_GPU_CUH_ + +#include "cudafunctions.cuh" +#include <fstream> +#include <structure_hpc.h> + +__device__ struct point module_potentialDerivatives_totalGradient_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int nhalos); + +__device__ inline struct point rotateCoordinateSystem_GPU(struct point P, double theta); +__device__ inline struct point rotateCoordinateSystem_GPU_2(struct point P, double cosi, double sinu); + +#endif /* GRADIENT_GPU_CUH_ */ diff --git a/Benchmarks/GridGradientBenchmark/grid_gradient_GPU.cu b/Benchmarks/GridGradientBenchmark/grid_gradient_GPU.cu new file mode 100644 index 0000000..a642baf --- /dev/null +++ b/Benchmarks/GridGradientBenchmark/grid_gradient_GPU.cu @@ -0,0 +1,760 @@ +#include <fstream> +#include "grid_gradient_GPU.cuh" + + + +void gradient_grid_GPU_sorted(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos ,int nbgridcells){ + + + int nBlocks_gpu = 0; + // Define the number of threads per block the GPU will use + cudaDeviceProp properties_gpu; + + cudaGetDeviceProperties(&properties_gpu, 0); // Get properties of 0th GPU in use + + if (properties_gpu.maxThreadsDim[0]<threadsPerBlock) + { + fprintf(stderr, "ERROR: The GPU has to support at least %u threads per block.\n", threadsPerBlock); + exit(-1); + } + else + { + nBlocks_gpu = properties_gpu.maxGridSize[0] / threadsPerBlock; // Get the maximum number of blocks with the chosen number of threads + // per Block that the GPU supports + } + + grid_param *frame_gpu; + Potential_SOA *lens_gpu,*lens_kernel ; + int *type_gpu; + double *lens_x_gpu, *lens_y_gpu, *b0_gpu, *angle_gpu, *epot_gpu, *rcore_gpu, *rcut_gpu; + double *grid_grad_x_gpu, *grid_grad_y_gpu ; + + lens_gpu = (Potential_SOA *) malloc(sizeof(Potential_SOA)); + lens_gpu->type = (int *) malloc(sizeof(int)); + + // Allocate variables on the GPU + cudasafe(cudaMalloc( (void**)&(lens_kernel), sizeof(Potential_SOA)),"Gradientgpu.cu : Alloc Potential_SOA: " ); + cudasafe(cudaMalloc( (void**)&(type_gpu), nhalos*sizeof(int)),"Gradientgpu.cu : Alloc type_gpu: " ); + cudasafe(cudaMalloc( (void**)&(lens_x_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc x_gpu: " ); + cudasafe(cudaMalloc( (void**)&(lens_y_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc y_gpu: " ); + cudasafe(cudaMalloc( (void**)&(b0_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc b0_gpu: " ); + cudasafe(cudaMalloc( (void**)&(angle_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc angle_gpu: " ); + cudasafe(cudaMalloc( (void**)&(epot_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc epot_gpu: " ); + cudasafe(cudaMalloc( (void**)&(rcore_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc rcore_gpu: " ); + cudasafe(cudaMalloc( (void**)&(rcut_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc rcut_gpu: " ); + cudasafe(cudaMalloc( (void**)&(frame_gpu), sizeof(grid_param)),"Gradientgpu.cu : Alloc frame_gpu: " ); + cudasafe(cudaMalloc( (void**)&(grid_grad_x_gpu), (nbgridcells) * (nbgridcells) *sizeof(double)),"Gradientgpu.cu : Alloc source_x_gpu: " ); + cudasafe(cudaMalloc( (void**)&(grid_grad_y_gpu), (nbgridcells) * (nbgridcells) *sizeof(double)),"Gradientgpu.cu : Alloc source_y_gpu: " ); + + // Copy values to the GPU + cudasafe(cudaMemcpy(type_gpu,lens->type , nhalos*sizeof(int),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy type_gpu: " ); + cudasafe(cudaMemcpy(lens_x_gpu,lens->position_x , nhalos*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy x_gpu: " ); + cudasafe(cudaMemcpy(lens_y_gpu,lens->position_y , nhalos*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy y_gpu: " ); + cudasafe(cudaMemcpy(b0_gpu,lens->b0 , nhalos*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy b0_gpu: " ); + cudasafe(cudaMemcpy(angle_gpu,lens->ellipticity_angle , nhalos*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy angle_gpu: " ); + cudasafe(cudaMemcpy(epot_gpu, lens->ellipticity_potential, nhalos*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy epot_gpu: " ); + cudasafe(cudaMemcpy(rcore_gpu, lens->rcore, nhalos*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy rcore_gpu: " ); + cudasafe(cudaMemcpy(rcut_gpu, lens->rcut, nhalos*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy rcut_gpu: " ); + cudasafe(cudaMemcpy(frame_gpu, frame, sizeof(grid_param), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy fame_gpu: " ); + + + //printf("%p \n", lens_gpu); + //printf("%p \n", type_gpu); + //printf("%p \n", lens_gpu->type); + //fflush(stdout); + lens_gpu->type = type_gpu; + lens_gpu->position_x = lens_x_gpu; + lens_gpu->position_y = lens_y_gpu; + lens_gpu->b0 = b0_gpu; + lens_gpu->ellipticity_angle = angle_gpu; + lens_gpu->ellipticity_potential = epot_gpu; + lens_gpu->rcore = rcore_gpu; + lens_gpu->rcut = rcut_gpu; + + cudaMemcpy(lens_kernel, lens_gpu, sizeof(Potential_SOA), cudaMemcpyHostToDevice); + + + if (int((nbgridcells) * (nbgridcells)/threadsPerBlock) == 0){ + gradient_grid_kernel<<<1,threadsPerBlock>>>(grid_grad_x_gpu, grid_grad_y_gpu,frame_gpu,nhalos, nbgridcells, lens_kernel); + } + else{ + gradient_grid_kernel<<<(nbgridcells) * (nbgridcells)/threadsPerBlock,threadsPerBlock>>>(grid_grad_x_gpu, grid_grad_y_gpu,frame_gpu,nhalos, nbgridcells, lens_kernel); + } + cudasafe(cudaMemcpy( grid_grad_x, grid_grad_x_gpu, (nbgridcells) * (nbgridcells) *sizeof(double),cudaMemcpyDeviceToHost ),"Gradientgpu.cu : Copy source_x_gpu: " ); + cudasafe(cudaMemcpy( grid_grad_y, grid_grad_y_gpu, (nbgridcells) * (nbgridcells) *sizeof(double), cudaMemcpyDeviceToHost),"Gradientgpu.cu : Copy source_y_gpu: " ); + + + //printf("%f %f \n",grid_grad_x[0],grid_grad_y[0]); + + // Free GPU memory + cudaFree(lens_gpu); + cudaFree(type_gpu); + cudaFree(lens_x_gpu); + cudaFree(lens_y_gpu); + cudaFree(b0_gpu); + cudaFree(angle_gpu); + cudaFree(epot_gpu); + cudaFree(rcore_gpu); + cudaFree(rcut_gpu); + cudaFree(grid_grad_x_gpu); + cudaFree(grid_grad_y_gpu); + +/* + for (int i = 0; i < nbgridcells; i++){ + for(int j = 0; j < nbgridcells; j++){ + printf(" %f",grid_grad_x[i*nbgridcells + j]); + } + printf("\n"); + }*/ + +} + + +void gradient_grid_pinned(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int *Nlens,int nbgridcells){ + + + int nBlocks_gpu = 0; + // Define the number of threads per block the GPU will use + cudaDeviceProp properties_gpu; + + cudaGetDeviceProperties(&properties_gpu, 0); // Get properties of 0th GPU in use + + if (properties_gpu.maxThreadsDim[0]<threadsPerBlock) + { + fprintf(stderr, "ERROR: The GPU has to support at least %u threads per block.\n", threadsPerBlock); + exit(-1); + } + else + { + nBlocks_gpu = properties_gpu.maxGridSize[0] / threadsPerBlock; // Get the maximum number of blocks with the chosen number of threads + // per Block that the GPU supports + } + + grid_param *frame_gpu; + double *lens_x_gpu, *lens_y_gpu, *b0_gpu, *angle_gpu, *epot_gpu, *rcore_gpu, *rcut_gpu; + double *grid_grad_x_gpu, *grid_grad_y_gpu ; + + //SIS// + + // Allocate variables on the GPU + cudasafe(cudaMalloc( (void**)&(lens_x_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc x_gpu: " ); + cudasafe(cudaMalloc( (void**)&(lens_y_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc y_gpu: " ); + cudasafe(cudaMalloc( (void**)&(b0_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc b0_gpu: " ); + cudasafe(cudaMalloc( (void**)&(angle_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc angle_gpu: " ); + cudasafe(cudaMalloc( (void**)&(epot_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc epot_gpu: " ); + cudasafe(cudaMalloc( (void**)&(rcore_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc rcore_gpu: " ); + cudasafe(cudaMalloc( (void**)&(rcut_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc rcut_gpu: " ); + cudasafe(cudaMalloc( (void**)&(frame_gpu), sizeof(grid_param)),"Gradientgpu.cu : Alloc frame_gpu: " ); + cudasafe(cudaMalloc( (void**)&(grid_grad_x_gpu), (nbgridcells) * (nbgridcells) *sizeof(double)),"Gradientgpu.cu : Alloc source_x_gpu: " ); + cudasafe(cudaMalloc( (void**)&(grid_grad_y_gpu), (nbgridcells) * (nbgridcells) *sizeof(double)),"Gradientgpu.cu : Alloc source_y_gpu: " ); + + // Copy values to the GPU + cudasafe(cudaMemcpy(lens_x_gpu,lens[0].position_x , Nlens[0]*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy x_gpu: " ); + cudasafe(cudaMemcpy(lens_y_gpu,lens[0].position_y , Nlens[0]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy y_gpu: " ); + cudasafe(cudaMemcpy(b0_gpu,lens[0].b0 , Nlens[0]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy b0_gpu: " ); + cudasafe(cudaMemcpy(angle_gpu,lens[0].ellipticity_angle , Nlens[0]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy angle_gpu: " ); + cudasafe(cudaMemcpy(epot_gpu, lens[0].ellipticity_potential, Nlens[0]*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy epot_gpu: " ); + cudasafe(cudaMemcpy(rcore_gpu, lens[0].rcore, Nlens[0]*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy rcore_gpu: " ); + cudasafe(cudaMemcpy(rcut_gpu, lens[0].rcut, Nlens[0]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy rcut_gpu: " ); + cudasafe(cudaMemcpy(frame_gpu, frame, sizeof(grid_param), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy fame_gpu: " ); + + //printf( "%d %d",nBlocks_gpu,(nbgridcells) * (nbgridcells)/threadsPerBlock ); + + if (int((nbgridcells) * (nbgridcells)/threadsPerBlock) == 0){ + gradient_grid_sis_GPU<<<1,threadsPerBlock>>>(grid_grad_x_gpu, grid_grad_y_gpu,frame_gpu,Nlens[0], nbgridcells, lens_x_gpu, lens_y_gpu, b0_gpu, angle_gpu, epot_gpu, rcore_gpu, rcut_gpu); + } + else{ + gradient_grid_sis_GPU<<<(nbgridcells) * (nbgridcells)/threadsPerBlock,threadsPerBlock>>>(grid_grad_x_gpu, grid_grad_y_gpu,frame_gpu,Nlens[0], nbgridcells, lens_x_gpu, lens_y_gpu, b0_gpu, angle_gpu, epot_gpu, rcore_gpu, rcut_gpu); + } + cudasafe(cudaMemcpy( grid_grad_x, grid_grad_x_gpu, (nbgridcells) * (nbgridcells) *sizeof(double),cudaMemcpyDeviceToHost ),"Gradientgpu.cu : Copy source_x_gpu: " ); + cudasafe(cudaMemcpy( grid_grad_y, grid_grad_y_gpu, (nbgridcells) * (nbgridcells) *sizeof(double), cudaMemcpyDeviceToHost),"Gradientgpu.cu : Copy source_y_gpu: " ); + //printf ("%f %f \n", grid_grad_x[0],grid_grad_y[0]); + + // Free GPU memory + cudaFree(lens_x_gpu); + cudaFree(lens_y_gpu); + cudaFree(b0_gpu); + cudaFree(angle_gpu); + cudaFree(epot_gpu); + cudaFree(rcore_gpu); + cudaFree(rcut_gpu); + cudaFree(grid_grad_x_gpu); + cudaFree(grid_grad_y_gpu); + + + //PIEMD8// + + // Allocate variables on the GPU + cudasafe(cudaMalloc( (void**)&(lens_x_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc x_gpu: " ); + cudasafe(cudaMalloc( (void**)&(lens_y_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc y_gpu: " ); + cudasafe(cudaMalloc( (void**)&(b0_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc b0_gpu: " ); + cudasafe(cudaMalloc( (void**)&(angle_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc angle_gpu: " ); + cudasafe(cudaMalloc( (void**)&(epot_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc epot_gpu: " ); + cudasafe(cudaMalloc( (void**)&(rcore_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc rcore_gpu: " ); + cudasafe(cudaMalloc( (void**)&(rcut_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc rcut_gpu: " ); + cudasafe(cudaMalloc( (void**)&(frame_gpu), sizeof(grid_param)),"Gradientgpu.cu : Alloc frame_gpu: " ); + cudasafe(cudaMalloc( (void**)&(grid_grad_x_gpu), (nbgridcells) * (nbgridcells) *sizeof(double)),"Gradientgpu.cu : Alloc source_x_gpu: " ); + cudasafe(cudaMalloc( (void**)&(grid_grad_y_gpu), (nbgridcells) * (nbgridcells) *sizeof(double)),"Gradientgpu.cu : Alloc source_y_gpu: " ); + + // Copy values to the GPU + cudasafe(cudaMemcpy(lens_x_gpu,lens[1].position_x , Nlens[1]*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy x_gpu: " ); + cudasafe(cudaMemcpy(lens_y_gpu,lens[1].position_y , Nlens[1]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy y_gpu: " ); + cudasafe(cudaMemcpy(b0_gpu,lens[1].b0 , Nlens[1]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy b0_gpu: " ); + cudasafe(cudaMemcpy(angle_gpu,lens[1].ellipticity_angle , Nlens[1]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy angle_gpu: " ); + cudasafe(cudaMemcpy(epot_gpu, lens[1].ellipticity_potential, Nlens[1]*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy epot_gpu: " ); + cudasafe(cudaMemcpy(rcore_gpu, lens[1].rcore, Nlens[1]*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy rcore_gpu: " ); + cudasafe(cudaMemcpy(rcut_gpu, lens[1].rcut, Nlens[1]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy rcut_gpu: " ); + cudasafe(cudaMemcpy(frame_gpu, frame, sizeof(grid_param), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy fame_gpu: " ); + + //printf( "%d %d",nBlocks_gpu,(nbgridcells) * (nbgridcells)/threadsPerBlock ); + + if (int((nbgridcells) * (nbgridcells)/threadsPerBlock) == 0){ + gradient_grid_piemd_GPU<<<1,threadsPerBlock>>>(grid_grad_x_gpu, grid_grad_y_gpu,frame_gpu,Nlens[1], nbgridcells, lens_x_gpu, lens_y_gpu, b0_gpu, angle_gpu, epot_gpu, rcore_gpu, rcut_gpu); + } + else{ + gradient_grid_piemd_GPU<<<(nbgridcells) * (nbgridcells)/threadsPerBlock,threadsPerBlock>>>(grid_grad_x_gpu, grid_grad_y_gpu,frame_gpu,Nlens[1], nbgridcells, lens_x_gpu, lens_y_gpu, b0_gpu, angle_gpu, epot_gpu, rcore_gpu, rcut_gpu); + } + cudasafe(cudaMemcpy( grid_grad_x, grid_grad_x_gpu, (nbgridcells) * (nbgridcells) *sizeof(double),cudaMemcpyDeviceToHost ),"Gradientgpu.cu : Copy source_x_gpu: " ); + cudasafe(cudaMemcpy( grid_grad_y, grid_grad_y_gpu, (nbgridcells) * (nbgridcells) *sizeof(double), cudaMemcpyDeviceToHost),"Gradientgpu.cu : Copy source_y_gpu: " ); + //printf ("%f %f \n", grid_grad_x[0],grid_grad_y[0]); + + // Free GPU memory + cudaFree(lens_x_gpu); + cudaFree(lens_y_gpu); + cudaFree(b0_gpu); + cudaFree(angle_gpu); + cudaFree(epot_gpu); + cudaFree(rcore_gpu); + cudaFree(rcut_gpu); + cudaFree(grid_grad_x_gpu); + cudaFree(grid_grad_y_gpu); + + for (int i = 0; i < nbgridcells; i++){ + for(int j = 0; j < nbgridcells; j++){ + printf(" %f",grid_grad_x[i*nbgridcells + j]); + } + printf("\n"); + } +/* + int nDevices; + + cudaGetDeviceCount(&nDevices); + for (int i = 0; i < nDevices; i++) { + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, i); + printf("Device Number: %d\n", i); + printf(" Device name: %s\n", prop.name); + printf(" Memory Clock Rate (KHz): %d\n", + prop.memoryClockRate); + printf(" Memory Bus Width (bits): %d\n", + prop.memoryBusWidth); + printf(" Peak Memory Bandwidth (GB/s): %f\n\n", + 2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6); + } + +*/ + + +} + +void gradient_grid_pinned_multiple(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int *Nlens,int nbgridcells){ + //get number of GPU devices + int nDevices; + cudaGetDeviceCount(&nDevices); + + // Initialise kernel variables, table for multiple devices + grid_param *frame_gpu[nDevices]; + double *lens_x_gpu[nDevices], *lens_y_gpu[nDevices], *b0_gpu[nDevices], *angle_gpu[nDevices], *epot_gpu[nDevices], *rcore_gpu[nDevices], *rcut_gpu[nDevices]; + double *grid_grad_x_gpu[nDevices], *grid_grad_y_gpu[nDevices] ; + + // Initialise multiple device variables + int Ndevice[nDevices], indexactual[nDevices]; + cudaStream_t stream[nDevices]; + + indexactual[0] = 0 ; + Ndevice[0] = (nbgridcells) * (nbgridcells)/nDevices; + printf("%d %d \n",indexactual[0], Ndevice[0]); + + for (int dev = 1; dev < nDevices; dev++) { + + Ndevice[dev] = (nbgridcells) * (nbgridcells)/nDevices; + + if(indexactual[dev]+Ndevice[dev] > (nbgridcells) * (nbgridcells)){ + Ndevice[dev] = (nbgridcells) * (nbgridcells) - indexactual[dev-1]; + } + + indexactual[dev] = indexactual[dev-1] + Ndevice[dev]; + printf("%d %d \n",indexactual[dev], Ndevice[dev]); + } + + for (int dev = 0; dev < nDevices; dev++) { + + + cudaSetDevice(dev); + + //SIS// + + + //PIEMD8// + + // Allocate variables on the GPU + cudasafe(cudaMalloc( (void**)&(lens_x_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc x_gpu: " ); + cudasafe(cudaMalloc( (void**)&(lens_y_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc y_gpu: " ); + cudasafe(cudaMalloc( (void**)&(b0_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc b0_gpu: " ); + cudasafe(cudaMalloc( (void**)&(angle_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc angle_gpu: " ); + cudasafe(cudaMalloc( (void**)&(epot_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc epot_gpu: " ); + cudasafe(cudaMalloc( (void**)&(rcore_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc rcore_gpu: " ); + cudasafe(cudaMalloc( (void**)&(rcut_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc rcut_gpu: " ); + cudasafe(cudaMalloc( (void**)&(frame_gpu[dev]), sizeof(grid_param)),"Gradientgpu.cu : Alloc frame_gpu: " ); + cudasafe(cudaMalloc( (void**)&(grid_grad_x_gpu[dev]), Ndevice[dev] *sizeof(double)),"Gradientgpu.cu : Alloc source_x_gpu: " ); + cudasafe(cudaMalloc( (void**)&(grid_grad_y_gpu[dev]), Ndevice[dev] *sizeof(double)),"Gradientgpu.cu : Alloc source_y_gpu: " ); + + + cudaStreamCreate(&stream[dev]); + + } + + for (int dev = 0; dev < nDevices; dev++) { + + + cudaSetDevice(dev); + + // Copy values to the GPU + cudasafe(cudaMemcpyAsync(lens_x_gpu[dev],lens[1].position_x , Nlens[1]*sizeof(double),cudaMemcpyHostToDevice,stream[dev] ),"Gradientgpu.cu : Copy x_gpu: " ); + cudasafe(cudaMemcpyAsync(lens_y_gpu[dev],lens[1].position_y , Nlens[1]*sizeof(double), cudaMemcpyHostToDevice,stream[dev]),"Gradientgpu.cu : Copy y_gpu: " ); + cudasafe(cudaMemcpyAsync(b0_gpu[dev],lens[1].b0 , Nlens[1]*sizeof(double), cudaMemcpyHostToDevice,stream[dev]),"Gradientgpu.cu : Copy b0_gpu: " ); + cudasafe(cudaMemcpyAsync(angle_gpu[dev],lens[1].ellipticity_angle , Nlens[1]*sizeof(double), cudaMemcpyHostToDevice,stream[dev]),"Gradientgpu.cu : Copy angle_gpu: " ); + cudasafe(cudaMemcpyAsync(epot_gpu[dev], lens[1].ellipticity_potential, Nlens[1]*sizeof(double),cudaMemcpyHostToDevice ,stream[dev]),"Gradientgpu.cu : Copy epot_gpu: " ); + cudasafe(cudaMemcpyAsync(rcore_gpu[dev], lens[1].rcore, Nlens[1]*sizeof(double),cudaMemcpyHostToDevice ,stream[dev]),"Gradientgpu.cu : Copy rcore_gpu: " ); + cudasafe(cudaMemcpyAsync(rcut_gpu[dev], lens[1].rcut, Nlens[1]*sizeof(double), cudaMemcpyHostToDevice,stream[dev]),"Gradientgpu.cu : Copy rcut_gpu: " ); + cudasafe(cudaMemcpyAsync(frame_gpu[dev], frame, sizeof(grid_param), cudaMemcpyHostToDevice,stream[dev]),"Gradientgpu.cu : Copy fame_gpu: " ); + } + + for (int dev = 0; dev < nDevices; dev++) { + + + cudaSetDevice(dev); + int nBlocks_gpu = 0; + cudaDeviceProp properties_gpu; + cudaGetDeviceProperties(&properties_gpu, dev); // Get properties of 0th GPU in use + + if (properties_gpu.maxThreadsDim[0]<threadsPerBlock) + { + fprintf(stderr, "ERROR: The GPU has to support at least %u threads per block.\n", threadsPerBlock); + exit(-1); + } + + + if (int((nbgridcells) * (nbgridcells)/threadsPerBlock) == 0){ + gradient_grid_piemd_GPU_multiple<<<1,threadsPerBlock,0,stream[dev]>>>(grid_grad_x_gpu[dev], grid_grad_y_gpu[dev],frame_gpu[dev],Nlens[1], nbgridcells,Ndevice[dev],indexactual[dev], lens_x_gpu[dev], lens_y_gpu[dev], b0_gpu[dev], angle_gpu[dev], epot_gpu[dev], rcore_gpu[dev], rcut_gpu[dev]); + } + else{ + gradient_grid_piemd_GPU_multiple<<<(nbgridcells) * (nbgridcells)/threadsPerBlock,threadsPerBlock,0,stream[dev]>>>(grid_grad_x_gpu[dev], grid_grad_y_gpu[dev],frame_gpu[dev],Nlens[1], nbgridcells, Ndevice[dev],indexactual[dev], lens_x_gpu[dev], lens_y_gpu[dev], b0_gpu[dev], angle_gpu[dev], epot_gpu[dev], rcore_gpu[dev], rcut_gpu[dev]); + } + } + + for (int dev = 0; dev < nDevices; dev++) { + cudasafe(cudaMemcpyAsync( grid_grad_x + indexactual[dev], grid_grad_x_gpu[dev], Ndevice[dev] *sizeof(double),cudaMemcpyDeviceToHost ,stream[dev]),"Gradientgpu.cu : Copy source_x_gpu: " ); + cudasafe(cudaMemcpyAsync( grid_grad_y + indexactual[dev], grid_grad_y_gpu[dev], Ndevice[dev] *sizeof(double), cudaMemcpyDeviceToHost,stream[dev]),"Gradientgpu.cu : Copy source_y_gpu: " ); + } + + for (int dev = 0; dev < nDevices; dev++) { + + + cudaSetDevice(dev); + // Free GPU memory + cudaFree(lens_x_gpu[dev]); + cudaFree(lens_y_gpu[dev]); + cudaFree(b0_gpu[dev]); + cudaFree(angle_gpu[dev]); + cudaFree(epot_gpu[dev]); + cudaFree(rcore_gpu[dev]); + cudaFree(rcut_gpu[dev]); + cudaFree(grid_grad_x_gpu[dev]); + cudaFree(grid_grad_y_gpu[dev]); + cudaStreamDestroy(stream[dev]); + + } +/* + for (int i = 0; i < nbgridcells; i++){ + for(int j = 0; j < nbgridcells; j++){ + printf(" %f",grid_grad_x[i*nbgridcells + j]); + } + printf("\n"); + }*/ + + + +} + +__global__ void gradient_grid_kernel(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcells, const struct Potential_SOA *lens) { + + //*grad_x = *grad_y = 0.; + + int bid=blockIdx.x; // index of the block (and of the set of images) + int tid=threadIdx.x; // index of the thread within the block + + double dx,dy; //pixelsize + int grid_dim, index; + struct point image_point, Grad; + dx = (frame->xmax - frame->xmin)/(nbgridcells-1); + dy = (frame->ymax - frame->ymin)/(nbgridcells-1); + grid_dim = (nbgridcells); + + index = bid * threadsPerBlock + tid ; + + while(index < grid_dim*grid_dim){ + + grid_grad_x[index] = 0.; + grid_grad_y[index] = 0.; + + image_point.x = frame->xmin + (index/grid_dim)*dx; + image_point.y = frame->ymin + (index % grid_dim)*dy; + + Grad = module_potentialDerivatives_totalGradient_SOA_GPU(&image_point, lens, Nlens); + + grid_grad_x[index] = Grad.x; + grid_grad_y[index] = Grad.y; + + bid += gridDim.x; + index = bid * threadsPerBlock + tid; + } + + +} + + +__global__ void gradient_grid_sis_GPU(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcells, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut) { + + //*grad_x = *grad_y = 0.; + + int bid=blockIdx.x; // index of the block (and of the set of images) + int tid=threadIdx.x; // index of the thread within the block + + double dx,dy,x_pos,y_pos; //pixelsize + int grid_dim, index; + struct point true_coord, true_coord_rotation; + double R; + dx = (frame->xmax - frame->xmin)/(nbgridcells-1); + dy = (frame->ymax - frame->ymin)/(nbgridcells-1); + grid_dim = (nbgridcells); + + index = bid * threadsPerBlock + tid ; + + + + while(index < grid_dim*grid_dim){ + + grid_grad_x[index] = 0.; + grid_grad_y[index] = 0.; + + x_pos= frame->xmin + (index/grid_dim)*dx; + y_pos= frame->ymin + (index % grid_dim)*dy; + //printf("%f %f \n", x_pos, y_pos); + + for(int iterator=0; iterator < Nlens; iterator++){ + + true_coord.x = x_pos - lens_x[iterator]; + true_coord.y = y_pos - lens_y[iterator]; + + // Change the origin of the coordinate system to the center of the clump + true_coord_rotation = rotateCoordinateSystem_GPU(true_coord, lens_angle[iterator]); + + R=sqrt(true_coord_rotation.x*true_coord_rotation.x*(1-lens_epot[iterator])+true_coord_rotation.y*true_coord_rotation.y*(1+lens_epot[iterator])); //ellippot = ellipmass/3 + grid_grad_x[index] += (1-lens_epot[iterator])*lens_b0[iterator]*true_coord_rotation.x/(R); + grid_grad_y[index] += (1+lens_epot[iterator])*lens_b0[iterator]*true_coord_rotation.y/(R); + + + + } + + + + + bid += gridDim.x; + index = bid * threadsPerBlock + tid; + } + + +} + +__global__ void gradient_grid_piemd_GPU(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcells, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut) { + + //*grad_x = *grad_y = 0.; + + int bid=blockIdx.x; // index of the block (and of the set of images) + int tid=threadIdx.x; // index of the thread within the block + + double dx,dy,x_pos,y_pos; //pixelsize + int grid_dim, index; + struct point true_coord, true_coord_rotation; + complex zis; + dx = (frame->xmax - frame->xmin)/(nbgridcells-1); + dy = (frame->ymax - frame->ymin)/(nbgridcells-1); + grid_dim = (nbgridcells); + + index = bid * threadsPerBlock + tid ; + + + + while(index < grid_dim*grid_dim){ + + //grid_grad_x[index] = 0.; + //grid_grad_y[index] = 0.; + + x_pos= frame->xmin + (index/grid_dim)*dx; + y_pos= frame->ymin + (index % grid_dim)*dy; + //printf("%f %f \n", x_pos, y_pos); + + for(int iterator=0; iterator < Nlens; iterator++){ + + true_coord.x = x_pos - lens_x[iterator]; + true_coord.y = y_pos - lens_y[iterator]; + + // Change the origin of the coordinate system to the center of the clump + true_coord_rotation = rotateCoordinateSystem_GPU(true_coord, lens_angle[iterator]); + + double x = true_coord_rotation.x; + double y = true_coord_rotation.y; + double eps = lens_epot[iterator]; + double rc = lens_rcore[iterator]; + + double sqe = sqrt(eps); + // + double cx1 = (1. - eps)/(1. + eps); + double cxro = (1. + eps)*(1. + eps); + double cyro = (1. - eps)*(1. - eps); + // + double rem2 = x*x/cxro + y*y/cyro; + + complex zci, znum, zden, zres; + double norm; + // + zci.re = 0; + zci.im = -0.5*(1. - eps*eps)/sqe; + // + znum.re = cx1*x; + znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1; + // + zden.re = x; + zden.im = 2.*rc*sqe - y; + norm = (zden.re*zden.re + zden.im*zden.im); // zis = znum/zden + // + zis.re = (znum.re*zden.re + znum.im*zden.im)/norm; + zis.im = (znum.im*zden.re - znum.re*zden.im)/norm; + norm = zis.re; + zis.re = log(sqrt(norm*norm + zis.im*zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) + zis.im = atan2(zis.im, norm); + // norm = zis.re; + zres.re = zci.re*zis.re - zci.im*zis.im; // Re( zci*ln(zis) ) + zres.im = zci.im*zis.re + zis.im*zci.re; // Im( zci*ln(zis) ) + // + zis.re = zres.re; + zis.im = zres.im; + + grid_grad_x[index] += lens_b0[iterator]*zis.re; + grid_grad_y[index] += lens_b0[iterator]*zis.im; + + + + } + + + + + bid += gridDim.x; + index = bid * threadsPerBlock + tid; + } + + +} + +__global__ void gradient_grid_piemd_GPU_multiple(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcells, int Ndevice, int indexactual, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut) { + + //*grad_x = *grad_y = 0.; + + int bid=blockIdx.x; // index of the block (and of the set of images) + int tid=threadIdx.x; // index of the thread within the block + + double dx,dy,x_pos,y_pos; //pixelsize + int grid_dim, index; + struct point true_coord, true_coord_rotation; + complex zis; + dx = (frame->xmax - frame->xmin)/(nbgridcells-1); + dy = (frame->ymax - frame->ymin)/(nbgridcells-1); + grid_dim = (nbgridcells); + + index = bid * threadsPerBlock + tid ; + + + + while(index < Ndevice){ + //printf("%d \n", index); + + grid_grad_x[index] = 0.; + grid_grad_y[index] = 0.; + + x_pos= frame->xmin + ((indexactual +index)/grid_dim)*dx; + y_pos= frame->ymin + ((indexactual +index) % grid_dim)*dy; + //printf("%f %f \n", x_pos, y_pos); + + for(int iterator=0; iterator < Nlens; iterator++){ + + true_coord.x = x_pos - lens_x[iterator]; + true_coord.y = y_pos - lens_y[iterator]; + + // Change the origin of the coordinate system to the center of the clump + true_coord_rotation = rotateCoordinateSystem_GPU(true_coord, lens_angle[iterator]); + + double x = true_coord_rotation.x; + double y = true_coord_rotation.y; + double eps = lens_epot[iterator]; + double rc = lens_rcore[iterator]; + + double sqe = sqrt(eps); + // + double cx1 = (1. - eps)/(1. + eps); + double cxro = (1. + eps)*(1. + eps); + double cyro = (1. - eps)*(1. - eps); + // + double rem2 = x*x/cxro + y*y/cyro; + + complex zci, znum, zden, zres; + double norm; + // + zci.re = 0; + zci.im = -0.5*(1. - eps*eps)/sqe; + // + znum.re = cx1*x; + znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1; + // + zden.re = x; + zden.im = 2.*rc*sqe - y; + norm = (zden.re*zden.re + zden.im*zden.im); // zis = znum/zden + // + zis.re = (znum.re*zden.re + znum.im*zden.im)/norm; + zis.im = (znum.im*zden.re - znum.re*zden.im)/norm; + norm = zis.re; + zis.re = log(sqrt(norm*norm + zis.im*zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) + zis.im = atan2(zis.im, norm); + // norm = zis.re; + zres.re = zci.re*zis.re - zci.im*zis.im; // Re( zci*ln(zis) ) + zres.im = zci.im*zis.re + zis.im*zci.re; // Im( zci*ln(zis) ) + // + zis.re = zres.re; + zis.im = zres.im; + + grid_grad_x[index] += lens_b0[iterator]*zis.re; + grid_grad_y[index] += lens_b0[iterator]*zis.im; + + + + } + + + + + bid += gridDim.x; + + index = bid * threadsPerBlock + tid; + } + + +} + + + +__global__ void piemd_GPU(double *grad_x, double *grad_y, point *pImage, int Nlens, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut) { + + //*grad_x = *grad_y = 0.; + + int bid=blockIdx.x; // index of the block (and of the set of images) + int tid=threadIdx.x; // index of the thread within the block + + struct point true_coord, true_coord_rotation; + complex zis; + //gridDim.x , threadsPerBlock; + + int iterator = bid * threadsPerBlock + tid ; + //for(int iterator = 0; iterator < Nlens; ++iterator){ + while(iterator < Nlens){ + //printf(" %d %d %d %d \n",iterator , bid , threadsPerBlock , tid ); + true_coord.x = pImage->x - lens_x[iterator]; + true_coord.y = pImage->y - lens_y[iterator]; + + // Change the origin of the coordinate system to the center of the clump + true_coord_rotation = rotateCoordinateSystem_GPU(true_coord, lens_angle[iterator]); + + double x = true_coord_rotation.x; + double y = true_coord_rotation.y; + double eps = lens_epot[iterator]; + double rc = lens_rcore[iterator]; + + double sqe = sqrt(eps); + // + double cx1 = (1. - eps)/(1. + eps); + double cxro = (1. + eps)*(1. + eps); + double cyro = (1. - eps)*(1. - eps); + // + double rem2 = x*x/cxro + y*y/cyro; + + complex zci, znum, zden, zres; + double norm; + // + zci.re = 0; + zci.im = -0.5*(1. - eps*eps)/sqe; + // + znum.re = cx1*x; + znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1; + // + zden.re = x; + zden.im = 2.*rc*sqe - y; + norm = (zden.re*zden.re + zden.im*zden.im); // zis = znum/zden + // + zis.re = (znum.re*zden.re + znum.im*zden.im)/norm; + zis.im = (znum.im*zden.re - znum.re*zden.im)/norm; + norm = zis.re; + zis.re = log(sqrt(norm*norm + zis.im*zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) + zis.im = atan2(zis.im, norm); + // norm = zis.re; + zres.re = zci.re*zis.re - zci.im*zis.im; // Re( zci*ln(zis) ) + zres.im = zci.im*zis.re + zis.im*zci.re; // Im( zci*ln(zis) ) + // + zis.re = zres.re; + zis.im = zres.im; + // + //zres.re = zis.re*b0; + //zres.im = zis.im*b0; + // + // + + //grad->x += lens_b0[iterator]*zis.re; + //grad->y += lens_b0[iterator]*zis.im; + atomicAdd_double(grad_x,lens_b0[iterator]*zis.re); + atomicAdd_double(grad_y,lens_b0[iterator]*zis.im); + + //printf("%f %f \n ", lens_b0[iterator]*zis.re, lens_b0[iterator]*zis.im); + //printf("%f %f \n ", *grad_x, *grad_y); + + bid += gridDim.x; + iterator = bid * threadsPerBlock + tid ; + + } + + +} + +__device__ static double atomicAdd_double(double* address, double val) +{ + unsigned long long int* address_as_ull = + (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} + diff --git a/Benchmarks/GridGradientBenchmark/grid_gradient_GPU.cuh b/Benchmarks/GridGradientBenchmark/grid_gradient_GPU.cuh new file mode 100644 index 0000000..1f136fa --- /dev/null +++ b/Benchmarks/GridGradientBenchmark/grid_gradient_GPU.cuh @@ -0,0 +1,27 @@ +/* + * gradientgpu.cuh + * + * Created on: Nov 29, 2016 + * Author: cerschae + */ + +#ifndef GRID_GRADIENT_GPU_CUH_ +#define GRID_GRADIENT_GPU_CUH_ + +#include "cudafunctions.cuh" +#include "gradient_GPU.cuh" +#include <structure_hpc.h> + +void gradient_grid_GPU_sorted(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int Nlens,int nbgridcells); +void gradient_grid_pinned(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int *Nlens,int nbgridcell); +void gradient_grid_pinned_multiple(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int *Nlens,int nbgridcell); + +__global__ void gradient_grid_kernel(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcells, const struct Potential_SOA *lens); +__global__ void gradient_grid_piemd_GPU(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcell, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut); +__global__ void gradient_grid_sis_GPU(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcell, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut); +__global__ void gradient_grid_piemd_GPU_multiple(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcell, int Ndevice, int indexactual, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut); + +//__device__ struct point rotateCoordinateSystem_GPU(struct point P, double theta); +__device__ static double atomicAdd_double(double* address, double val); + +#endif /* GRADIENTGPU_CUH_ */ diff --git a/Benchmarks/GridGradientBenchmark/main.cpp b/Benchmarks/GridGradientBenchmark/main.cpp index 2977ffc..10f0476 100644 --- a/Benchmarks/GridGradientBenchmark/main.cpp +++ b/Benchmarks/GridGradientBenchmark/main.cpp @@ -1,254 +1,271 @@ - /** - * @file main.cpp - * @Author Christoph Schaaefer, EPFL (christophernstrerne.schaefer@epfl.ch) - * @date October 2016 - * @brief Benchmark for gradhalo function - */ - - #include <iostream> - #include <iomanip> - #include <string.h> - #include <math.h> - #include <sys/time.h> - #include <fstream> - #include <sys/stat.h> - // - #include <mm_malloc.h> - // - #include <structure_hpc.h> - #include "timer.h" - #include "gradient.hpp" - #include "chi_CPU.hpp" - #include "module_cosmodistances.h" - #include "module_readParameters.hpp" - #include<omp.h> - - int module_readCheckInput_readInput(int argc, char *argv[]) - { - /// check if there is a correct number of arguments, and store the name of the input file in infile - - char* infile; - struct stat file_stat; - - // If we do not have 3 arguments, stop - if ( argc != 3 ) - { - fprintf(stderr, "\nUnexpected number of arguments\n"); - fprintf(stderr, "\nUSAGE:\n"); - fprintf(stderr, "lenstool input_file output_directorypath [-n]\n\n"); - exit(-1); - } - else if ( argc == 3 ) - infile=argv[1]; - std::ifstream ifile(infile,std::ifstream::in); // Open the file - - - int ts = (int) time (NULL); - char buffer[10]; - std::stringstream ss; - ss << ts; - std::string trimstamp = ss.str(); - // - std::string outdir = argv[2]; - outdir += "-"; - outdir += trimstamp; - std::cout << outdir << std::endl; - - // check whether the output directory already exists - if (stat(outdir.c_str(), &file_stat) < 0){ - mkdir(outdir.c_str(), S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH ); - } - else { - printf("Error : Directory %s already exists. Specify a non existing directory.\n",argv[2]); - exit(-1); - } - - // check whether the input file exists. If it could not be opened (ifile = 0), it does not exist - if(ifile){ - ifile.close(); - } - else{ - printf("The file %s does not exist, please specify a valid file name\n",infile); +/** +* @file main.cpp +* @Author Christoph Schaaefer, EPFL (christophernstrerne.schaefer@epfl.ch) +* @date October 2016 +* @brief Benchmark for gradhalo function +*/ + +#include <iostream> +#include <iomanip> +#include <string.h> +#include <math.h> +#include <sys/time.h> +#include <fstream> +#include <sys/stat.h> +// +#include <mm_malloc.h> +// +#include <cuda_runtime.h> +#include <structure_hpc.h> +#include "timer.h" +#include "gradient.hpp" +#include "chi_CPU.hpp" +#include "module_cosmodistances.h" +#include "module_readParameters.hpp" +#include "grid_gradient_GPU.cuh" +#include<omp.h> + +int module_readCheckInput_readInput(int argc, char *argv[]) +{ +/// check if there is a correct number of arguments, and store the name of the input file in infile + +char* infile; +struct stat file_stat; + +// If we do not have 3 arguments, stop +if ( argc != 3 ) +{ + fprintf(stderr, "\nUnexpected number of arguments\n"); + fprintf(stderr, "\nUSAGE:\n"); + fprintf(stderr, "lenstool input_file output_directorypath [-n]\n\n"); + exit(-1); +} +else if ( argc == 3 ) + infile=argv[1]; +std::ifstream ifile(infile,std::ifstream::in); // Open the file + + +int ts = (int) time (NULL); +char buffer[10]; +std::stringstream ss; +ss << ts; +std::string trimstamp = ss.str(); +// +std::string outdir = argv[2]; +outdir += "-"; +outdir += trimstamp; +std::cout << outdir << std::endl; + +// check whether the output directory already exists +if (stat(outdir.c_str(), &file_stat) < 0){ + mkdir(outdir.c_str(), S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH ); +} +else { + printf("Error : Directory %s already exists. Specify a non existing directory.\n",argv[2]); exit(-1); - } +} - return 0; +// check whether the input file exists. If it could not be opened (ifile = 0), it does not exist +if(ifile){ +ifile.close(); +} +else{ +printf("The file %s does not exist, please specify a valid file name\n",infile); +exit(-1); } - int main(int argc, char *argv[]) - { - - - // Setting Up the problem - //=========================================================================================================== - - // This module function reads the terminal input when calling LENSTOOL and checks that it is correct - // Otherwise it exits LENSTOOL - module_readCheckInput_readInput(argc, argv); - - // This module function reads the cosmology parameters from the parameter file - // Input: struct cosmologicalparameters cosmology, parameter file - // Output: Initialized cosmology struct - cosmo_param cosmology; // Cosmology struct to store the cosmology data from the file - std::string inputFile = argv[1]; // Input file - module_readParameters_readCosmology(inputFile, cosmology); - - // This module function reads the runmode paragraph and the number of sources, arclets, etc. in the parameter file. - // The runmode_param stores the information of what exactly the user wants to do with lenstool. - struct runmode_param runmode; - module_readParameters_readRunmode(inputFile, &runmode); - - module_readParameters_debug_cosmology(runmode.debug, cosmology); - module_readParameters_debug_runmode(runmode.debug, runmode); - - - //=== Declaring variables - struct grid_param frame; - struct galaxy images[runmode.nimagestot]; - struct galaxy sources[runmode.nsets]; - struct Potential lenses[runmode.nhalos+runmode.npotfile-1]; - struct Potential_SOA lenses_SOA_table[NTYPES]; - struct Potential_SOA lenses_SOA; - struct cline_param cline; - struct potfile_param potfile; - struct Potential potfilepotentials[runmode.npotfile]; - struct potentialoptimization host_potentialoptimization[runmode.nhalos]; - int nImagesSet[runmode.nsets]; // Contains the number of images in each set of images - - // This module function reads in the potential form and its parameters (e.g. NFW) - // Input: input file - // Output: Potentials and its parameters - - - module_readParameters_Potential(inputFile, lenses, runmode.nhalos); - //Converts to SOA - //module_readParameters_PotentialSOA(inputFile, lenses, lenses_SOA, runmode.Nlens); - module_readParameters_PotentialSOA(inputFile, lenses, &lenses_SOA, runmode.nhalos); - //module_readParameters_PotentialSOA_nonsorted(inputFile, lenses, &lenses_SOA_nonsorted, runmode.nhalos); - module_readParameters_debug_potential(runmode.debug, lenses, runmode.nhalos); - //std::cerr << lenses_SOA[1].b0[0] << " " << lenses[0].b0 << std::endl; - // This module function reads in the potfiles parameters - // Input: input file - // Output: Potentials from potfiles and its parameters - - if (runmode.potfile == 1 ){ - module_readParameters_readpotfiles_param(inputFile, &potfile); - module_readParameters_debug_potfileparam(runmode.debug, &potfile); - module_readParameters_readpotfiles(&runmode,&potfile,lenses); - module_readParameters_debug_potential(runmode.debug, lenses, runmode.nhalos+runmode.npotfile); +return 0; +} - } +int main(int argc, char *argv[]) +{ + + +// Setting Up the problem +//=========================================================================================================== + +// This module function reads the terminal input when calling LENSTOOL and checks that it is correct +// Otherwise it exits LENSTOOL +module_readCheckInput_readInput(argc, argv); + +// This module function reads the cosmology parameters from the parameter file +// Input: struct cosmologicalparameters cosmology, parameter file +// Output: Initialized cosmology struct +cosmo_param cosmology; // Cosmology struct to store the cosmology data from the file +std::string inputFile = argv[1]; // Input file +module_readParameters_readCosmology(inputFile, cosmology); + +// This module function reads the runmode paragraph and the number of sources, arclets, etc. in the parameter file. +// The runmode_param stores the information of what exactly the user wants to do with lenstool. +struct runmode_param runmode; +module_readParameters_readRunmode(inputFile, &runmode); + +module_readParameters_debug_cosmology(runmode.debug, cosmology); +module_readParameters_debug_runmode(runmode.debug, runmode); + + +//=== Declaring variables +struct grid_param frame; +struct galaxy images[runmode.nimagestot]; +struct galaxy sources[runmode.nsets]; +struct Potential lenses[runmode.nhalos+runmode.npotfile-1]; +struct Potential_SOA lenses_SOA_table[NTYPES]; +struct Potential_SOA lenses_SOA; +struct cline_param cline; +struct potfile_param potfile; +struct Potential potfilepotentials[runmode.npotfile]; +struct potentialoptimization host_potentialoptimization[runmode.nhalos]; +int nImagesSet[runmode.nsets]; // Contains the number of images in each set of images + +// This module function reads in the potential form and its parameters (e.g. NFW) +// Input: input file +// Output: Potentials and its parameters + + +module_readParameters_Potential(inputFile, lenses, runmode.nhalos); +//Converts to SOA +//module_readParameters_PotentialSOA(inputFile, lenses, lenses_SOA, runmode.Nlens); +module_readParameters_PotentialSOA(inputFile, lenses, &lenses_SOA, runmode.nhalos); +//module_readParameters_PotentialSOA_nonsorted(inputFile, lenses, &lenses_SOA_nonsorted, runmode.nhalos); +module_readParameters_debug_potential(runmode.debug, lenses, runmode.nhalos); +//std::cerr << lenses_SOA[1].b0[0] << " " << lenses[0].b0 << std::endl; +// This module function reads in the potfiles parameters +// Input: input file +// Output: Potentials from potfiles and its parameters + +if (runmode.potfile == 1 ){ + module_readParameters_readpotfiles_param(inputFile, &potfile); + module_readParameters_debug_potfileparam(runmode.debug, &potfile); + module_readParameters_readpotfiles(&runmode,&potfile,lenses); + module_readParameters_debug_potential(runmode.debug, lenses, runmode.nhalos+runmode.npotfile); +} - // This module function reads in the grid form and its parameters - // Input: input file - // Output: grid and its parameters +// This module function reads in the grid form and its parameters +// Input: input file +// Output: grid and its parameters - module_readParameters_Grid(inputFile, &frame); +module_readParameters_Grid(inputFile, &frame); - if (runmode.image == 1 or runmode.inverse == 1 or runmode.time > 0){ - // This module function reads in the strong lensing images - module_readParameters_readImages(&runmode, images, nImagesSet); - //runmode.nsets = runmode.nimagestot; - for(int i = 0; i < runmode.nimagestot; ++i){ +if (runmode.image == 1 or runmode.inverse == 1 or runmode.time > 0){ - images[i].dls = module_cosmodistances_objectObject(lenses[0].z, images[i].redshift, cosmology); - images[i].dos = module_cosmodistances_observerObject(images[i].redshift, cosmology); - images[i].dr = module_cosmodistances_lensSourceToObserverSource(lenses[0].z, images[i].redshift, cosmology); + // This module function reads in the strong lensing images + module_readParameters_readImages(&runmode, images, nImagesSet); + //runmode.nsets = runmode.nimagestot; + for(int i = 0; i < runmode.nimagestot; ++i){ - } - module_readParameters_debug_image(runmode.debug, images, nImagesSet,runmode.nsets); + images[i].dls = module_cosmodistances_objectObject(lenses[0].z, images[i].redshift, cosmology); + images[i].dos = module_cosmodistances_observerObject(images[i].redshift, cosmology); + images[i].dr = module_cosmodistances_lensSourceToObserverSource(lenses[0].z, images[i].redshift, cosmology); } + module_readParameters_debug_image(runmode.debug, images, nImagesSet,runmode.nsets); - if (runmode.inverse == 1){ - - // This module function reads in the potential optimisation limits - module_readParameters_limit(inputFile,host_potentialoptimization,runmode.nhalos); - module_readParameters_debug_limit(runmode.debug, host_potentialoptimization[0]); - } +} +if (runmode.inverse == 1){ - if (runmode.source == 1){ - //Initialisation to default values.(Setting sources to z = 1.5 default value) - for(int i = 0; i < runmode.nsets; ++i){ - sources[i].redshift = 1.5; - } - // This module function reads in the strong lensing sources - module_readParameters_readSources(&runmode, sources); - //Calculating cosmoratios - for(int i = 0; i < runmode.nsets; ++i){ + // This module function reads in the potential optimisation limits + module_readParameters_limit(inputFile,host_potentialoptimization,runmode.nhalos); + module_readParameters_debug_limit(runmode.debug, host_potentialoptimization[0]); +} - sources[i].dls = module_cosmodistances_objectObject(lenses[0].z, sources[i].redshift, cosmology); - sources[i].dos = module_cosmodistances_observerObject(sources[i].redshift, cosmology); - sources[i].dr = module_cosmodistances_lensSourceToObserverSource(lenses[0].z, sources[i].redshift, cosmology); - } - module_readParameters_debug_source(runmode.debug, sources, runmode.nsets); +if (runmode.source == 1){ + //Initialisation to default values.(Setting sources to z = 1.5 default value) + for(int i = 0; i < runmode.nsets; ++i){ + sources[i].redshift = 1.5; + } + // This module function reads in the strong lensing sources + module_readParameters_readSources(&runmode, sources); + //Calculating cosmoratios + for(int i = 0; i < runmode.nsets; ++i){ + + sources[i].dls = module_cosmodistances_objectObject(lenses[0].z, sources[i].redshift, cosmology); + sources[i].dos = module_cosmodistances_observerObject(sources[i].redshift, cosmology); + sources[i].dr = module_cosmodistances_lensSourceToObserverSource(lenses[0].z, sources[i].redshift, cosmology); } + module_readParameters_debug_source(runmode.debug, sources, runmode.nsets); - std::cout << "--------------------------" << std::endl << std::endl; +} + +std::cout << "--------------------------" << std::endl << std::endl; + +double t_1(0),t_2(0),t_3(0),t_4(0); - double t_1(0),t_2(0),t_3(0),t_4(0); +// Lenstool-CPU Grid-Gradient +//=========================================================================================================== - // Lenstool-CPU Grid-Gradient - //=========================================================================================================== +//Setting Test: +double dx, dy; - //Setting Test: - double dx, dy; +dx = (frame.xmax - frame.xmin)/(runmode.nbgridcells-1); +dy = (frame.ymax - frame.ymin)/(runmode.nbgridcells-1); - dx = (frame.xmax - frame.xmin)/(runmode.nbgridcells-1); - dy = (frame.ymax - frame.ymin)/(runmode.nbgridcells-1); +point test_point1_1,test_point2_2, test_result1_1,test_result2_2; +double dlsds = images[0].dr; - point test_point1_1,test_point2_2, test_result1_1,test_result2_2; - double dlsds = images[0].dr; +test_point1_1.x = frame.xmin; +test_point1_1.y = frame.ymin; +test_point2_2.x = frame.xmin+dx; +test_point2_2.y = frame.ymin+dy; - test_point1_1.x = frame.xmin; - test_point1_1.y = frame.ymin; - test_point2_2.x = frame.xmin+dx; - test_point2_2.y = frame.ymin+dy; +test_result1_1 = module_potentialDerivatives_totalGradient_SOA(&test_point1_1, &lenses_SOA, runmode.nhalos); +test_result2_2 = module_potentialDerivatives_totalGradient_SOA(&test_point2_2, &lenses_SOA, runmode.nhalos); - test_result1_1 = module_potentialDerivatives_totalGradient_SOA(&test_point1_1, &lenses_SOA, runmode.nhalos); - test_result2_2 = module_potentialDerivatives_totalGradient_SOA(&test_point2_2, &lenses_SOA, runmode.nhalos); +std::cout << " Initial Test " << std::endl; +std::cout << " Test 1: " << std::endl; +std::cout << " Point 1 : " << std::setprecision(5) << test_point1_1.x << " "<< test_point1_1.y << std::endl; +std::cout << " Gradient " << std::setprecision(5) << test_result1_1.x << " "<< test_result1_1.y << std::endl; +std::cout << " Test 2: " << std::endl; +std::cout << " Point 2 : " << std::setprecision(5) << test_point2_2.x << " "<< test_point2_2.y << std::endl; +std::cout << " Gradient " << std::setprecision(5) << test_result2_2.x << " "<< test_result2_2.y << std::endl; - std::cout << " Initial Test " << std::endl; - std::cout << " Test 1: " << std::endl; - std::cout << " Point 1 : " << std::setprecision(5) << test_point1_1.x << " "<< test_point1_1.y << std::endl; - std::cout << " Gradient " << std::setprecision(5) << test_result1_1.x << " "<< test_result1_1.y << std::endl; - std::cout << " Test 2: " << std::endl; - std::cout << " Point 2 : " << std::setprecision(5) << test_point2_2.x << " "<< test_point2_2.y << std::endl; - std::cout << " Gradient " << std::setprecision(5) << test_result2_2.x << " "<< test_result2_2.y << std::endl; +double *grid_gradient_x, *grid_gradient_y; - double *grid_gradient_x, *grid_gradient_y; +grid_gradient_x = (double *)malloc((int) (runmode.nbgridcells) * (runmode.nbgridcells) * sizeof(double)); +grid_gradient_y = (double *)malloc((int) (runmode.nbgridcells) * (runmode.nbgridcells) * sizeof(double)); +int grid_dim = runmode.nbgridcells; - grid_gradient_x = (double *)malloc((int) (runmode.nbgridcells) * (runmode.nbgridcells) * sizeof(double)); - grid_gradient_y = (double *)malloc((int) (runmode.nbgridcells) * (runmode.nbgridcells) * sizeof(double)); - int grid_dim = runmode.nbgridcells; +#if 1 +t_2 = -myseconds(); +//Packaging the image to sourceplane conversion +gradient_grid_CPU(grid_gradient_x,grid_gradient_y,&frame,&lenses_SOA,runmode.nhalos,grid_dim); +t_2 += myseconds(); + +std::cout << " gradient_grid_CPU Brute Force Benchmark " << std::endl; +std::cout << " Test 1: " << std::endl; +std::cout << " Point 1 : " << std::setprecision(5) << test_point1_1.x << " "<< test_point1_1.y << std::endl; +std::cout << " Gradient " << std::setprecision(5) << test_result1_1.x << " "<< test_result1_1.y << std::endl; +std::cout << " Test 2: " << std::endl; +std::cout << " Point 2 : " << std::setprecision(5) << test_point2_2.x << " "<< test_point2_2.y << std::endl; +std::cout << " Gradient " << std::setprecision(5) << test_result2_2.x << " "<< test_result2_2.y << std::endl; +std::cout << " Time " << std::setprecision(15) << t_2 << std::endl; +#endif #if 1 - t_2 = -myseconds(); - //Packaging the image to sourceplane conversion - gradient_grid_CPU(grid_gradient_x,grid_gradient_y,&frame,&lenses_SOA,runmode.nhalos,grid_dim); - t_2 += myseconds(); - - std::cout << " gradient_grid_CPU Brute Force Benchmark " << std::endl; - std::cout << " Test 1: " << std::endl; - std::cout << " Point 1 : " << std::setprecision(5) << test_point1_1.x << " "<< test_point1_1.y << std::endl; - std::cout << " Gradient " << std::setprecision(5) << test_result1_1.x << " "<< test_result1_1.y << std::endl; - std::cout << " Test 2: " << std::endl; - std::cout << " Point 2 : " << std::setprecision(5) << test_point2_2.x << " "<< test_point2_2.y << std::endl; - std::cout << " Gradient " << std::setprecision(5) << test_result2_2.x << " "<< test_result2_2.y << std::endl; - std::cout << " Time " << std::setprecision(15) << t_2 << std::endl; +t_2 = -myseconds(); +gradient_grid_GPU_sorted(grid_gradient_x,grid_gradient_y,&frame,&lenses_SOA,runmode.nhalos,grid_dim); +t_2 += myseconds(); + +std::cout << " gradient_grid_GPU_sorted Brute Force Benchmark " << std::endl; +std::cout << " Test 1: " << std::endl; +std::cout << " Point 1 : " << std::setprecision(5) << test_point1_1.x << " "<< test_point1_1.y << std::endl; +std::cout << " Gradient " << std::setprecision(5) << test_result1_1.x << " "<< test_result1_1.y << std::endl; +std::cout << " Test 2: " << std::endl; +std::cout << " Point 2 : " << std::setprecision(5) << test_point2_2.x << " "<< test_point2_2.y << std::endl; +std::cout << " Gradient " << std::setprecision(5) << test_result2_2.x << " "<< test_result2_2.y << std::endl; +std::cout << " Time " << std::setprecision(15) << t_2 << std::endl; #endif }