diff --git a/Benchmarks/ConfigFiles/90Pot.par b/Benchmarks/ConfigFiles/90Pot.par
index df1aad4..1261aca 100644
--- a/Benchmarks/ConfigFiles/90Pot.par
+++ b/Benchmarks/ConfigFiles/90Pot.par
@@ -1,946 +1,946 @@
 #The user defines which mode of lenstool he wants to run here
 runmode
 	source 0 sources.cat										
 	image 1	../ConfigFiles/219Img.txt
 	#inverse 0
 	grid 1 22 1.5  									
 	poten 1 1000 1.5 
 	mass 1 1000 0.4 1.5
 	dpl 1 1000 1.5
 	amplif 1 1000 1.5
-	nbgridcells 1000
+	nbgridcells 5000
 	Debug   										# true activates debug mode
 	end
 frame  
 	#dmax 45										#either dmax or x and y have to be declared when a grid is used
 	xmin     -50.000
 	xmax     50.000
 	ymin     -50.000
 	ymax     50.000
 	end
 cosmology												
 	model	  1
 	H0        70.000
 	omegaM    0.300
 	omegaX    0.700
 	omegaK	  0.000
 	wA	  0.000
 	wX        -1.000
 	end
 potentiel 1
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     0.000								#X Position [arcsec]
 	y_centre     0.000								#Y Position [arcsec]
 	ellipticity     0.11					#Ellipticity
 	v_disp     800.									#Dispersion Velocity [km/s]
 	rcut	500										#Cut radius (PIEMD distribution)
 	rcore 	5										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 2
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     25.000								#X Position [arcsec]
 	y_centre     20.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     140.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 4
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     -16.000								#X Position [arcsec]
 	y_centre     2.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     130.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 5
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     -10.000								#X Position [arcsec]
 	y_centre     0.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     110.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 6
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     3.000								#X Position [arcsec]
 	y_centre     -10.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     80.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     120.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 potentiel 7
 	profil     8									#Profile: 5 SIS, 8 PIEMD
 	x_centre     17.000								#X Position [arcsec]
 	y_centre     -7.000								#Y Position [arcsec]
 	ellipticity     0.11							#Ellipticity
 	v_disp     1.									#Dispersion Velocity [km/s]
 	rcut	150										#Cut radius (PIEMD distribution)
 	rcore 	1										#Core radius (PIEMD distribution)
 	z_lens     0.4									#Redshift of lens
 	end
 limit 1
     x_centre           1 -1.0  1.0 0.01000
     y_centre           1 -1.0  1.0 0.010000
     ellipticity        1  0.05  0.75 0.01
     angle_pos          0 0. 180.0 0.1
     rcore     0 0.1 4. 0.100000
     rcut      0  1.0 20.0 0.10000
     v_disp          0 200.0 1000.0  0.10000
     end
 cline
 	nplan     1 1.5
 	dmax	50
 	limitLow	0.2
 	limitHigh	10
 	nbgridcells	1000
 	end
 finish
diff --git a/Benchmarks/GridGradientBenchmark/cudafunctions.cu b/Benchmarks/GridGradientBenchmark/cudafunctions.cu
new file mode 100644
index 0000000..9539941
--- /dev/null
+++ b/Benchmarks/GridGradientBenchmark/cudafunctions.cu
@@ -0,0 +1,39 @@
+/**
+* @file   cudafunctions.cpp
+* @Author Christoph Schaefer, EPFL (christophernstrerne.schaefer@epfl.ch)
+* @date   July 2015
+* @version 0,1
+* @brief  Functions for safe calling of cuda function and timing functions
+*
+*/
+
+
+
+
+
+
+/// Include
+///==========================================================================================================
+#include<stdio.h>
+#include<string>
+#include<strings.h>
+#include<stdlib.h>
+#include<iostream>
+#include<fstream>
+#include<math.h>
+#include<time.h>
+#include"cudafunctions.cuh"
+#include <cuda_runtime.h>
+#include<typeinfo>
+
+
+
+/** @brief Safe Cuda call function. Checks for error, prints and then stops the execution.
+*/
+void cudasafe( cudaError_t error, std::string message)
+{
+  if(error!=cudaSuccess) {
+     fprintf(stderr,"ERROR: %s : %s \n",message.c_str(),cudaGetErrorString(error));
+     exit(-1);
+  }
+}
diff --git a/Benchmarks/GridGradientBenchmark/cudafunctions.cuh b/Benchmarks/GridGradientBenchmark/cudafunctions.cuh
new file mode 100644
index 0000000..5208385
--- /dev/null
+++ b/Benchmarks/GridGradientBenchmark/cudafunctions.cuh
@@ -0,0 +1,33 @@
+/**
+* @file   cudafunctions.cuh
+* @Author Christoph Schaefer, EPFL (christophernstrerne.schaefer@epfl.ch)
+* @date   July 2015
+* @version 0,1
+* @brief  Header file for cudafunctions.cu
+*
+*/
+
+
+
+
+
+
+
+//Header guard
+#ifndef CUDAFUNCTIONS_CUH
+#define CUDAFUNCTIONS_CUH
+
+
+
+
+// Include
+//===========================================================================================================
+#include <strings.h>
+#include "structure.h"
+#include <iostream>
+
+void cudasafe( cudaError_t error, std::string message);
+
+
+//End header guard
+#endif
diff --git a/Benchmarks/GridGradientBenchmark/gradient_GPU.cu b/Benchmarks/GridGradientBenchmark/gradient_GPU.cu
new file mode 100644
index 0000000..c26d7af
--- /dev/null
+++ b/Benchmarks/GridGradientBenchmark/gradient_GPU.cu
@@ -0,0 +1,350 @@
+
+#include "gradient_GPU.cuh"
+
+#include <iostream>
+#include <string.h>
+#include <cuda_runtime.h>
+#include <math.h>
+#include <sys/time.h>
+#include <fstream>
+#include <immintrin.h>
+#include <map>
+/*
+#ifdef __AVX__
+#include "simd_math_avx.h"
+#endif
+#ifdef __AVX512F__
+#include "simd_math_avx512f.h"
+#endif
+*/
+#include "structure_hpc.h"
+#include "utils.hpp"
+
+
+
+//
+// SOA versions, vectorizable
+//
+__device__ struct point module_potentialDerivatives_totalGradient_5_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos)
+{
+        //asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA begins");
+  //
+  struct point grad, clumpgrad;
+        grad.x = 0;
+        grad.y = 0;
+  for(int i = shalos; i < shalos + nhalos; i++)
+  {
+    //
+    struct point true_coord, true_coord_rotation;
+    //
+    true_coord.x = pImage->x - lens->position_x[i];
+                true_coord.y = pImage->y - lens->position_y[i];
+    //
+    true_coord_rotation = rotateCoordinateSystem_GPU(true_coord, lens->ellipticity_angle[i]);
+    double R = sqrt(true_coord_rotation.x*true_coord_rotation.x*(1 - lens->ellipticity_potential[i])+true_coord_rotation.y*true_coord_rotation.y*(1 + lens->ellipticity_potential[i]));
+    //
+    grad.x += (1 - lens->ellipticity[i]/3.)*lens->b0[i]*true_coord_rotation.x/R;
+    grad.y += (1 + lens->ellipticity[i]/3.)*lens->b0[i]*true_coord_rotation.y/R;
+  }
+  return grad;
+}
+
+//
+//
+//
+__device__ struct point module_potentialDerivatives_totalGradient_8_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos)
+{
+  //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins");
+  // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
+  //
+  struct point grad, clumpgrad;
+  grad.x = 0;
+  grad.y = 0;
+  //
+  for(int i = shalos; i < shalos + nhalos; i++)
+  {
+    //IACA_START;
+    //
+    struct point true_coord, true_coord_rot; //, result;
+    //double       R, angular_deviation;
+    complex      zis;
+    //
+    //result.x = result.y = 0.;
+    //
+    true_coord.x = pImage->x - lens->position_x[i];
+    true_coord.y = pImage->y - lens->position_y[i];
+    double cosi,sinu;
+    cosi = cos(lens->ellipticity_angle[i]);
+    sinu = sin(lens->ellipticity_angle[i]);
+    //positionning at the potential center
+    // Change the origin of the coordinate system to the center of the clump
+    //true_coord_rot = rotateCoordinateSystem_GPU(true_coord, lens->ellipticity_angle[i]);
+    true_coord_rot = rotateCoordinateSystem_GPU_2(true_coord, cosi,sinu);
+    //
+    double x   = true_coord_rot.x;
+    double y   = true_coord_rot.y;
+    double eps = lens->ellipticity_potential[i];
+    double rc  = lens->rcore[i];
+    //
+    //std::cout << "piemd_lderivatives" << std::endl;
+    //
+    double sqe  = sqrt(eps);
+    //
+    double cx1  = (1. - eps)/(1. + eps);
+    double cxro = (1. + eps)*(1. + eps);
+    double cyro = (1. - eps)*(1. - eps);
+    //
+    double rem2 = x*x/cxro + y*y/cyro;
+    //
+    complex zci, znum, zden, zres;
+    double norm;
+    //
+    zci.re  = 0;
+    zci.im  = -0.5*(1. - eps*eps)/sqe;
+    //
+    znum.re = cx1*x;
+    znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1;
+    //
+    zden.re = x;
+    zden.im = 2.*rc*sqe - y;
+    norm    = (zden.re*zden.re + zden.im*zden.im);     // zis = znum/zden
+    //
+    zis.re  = (znum.re*zden.re + znum.im*zden.im)/norm;
+    zis.im  = (znum.im*zden.re - znum.re*zden.im)/norm;
+    norm    = zis.re;
+    zis.re  = log(sqrt(norm*norm + zis.im*zis.im));  // ln(zis) = ln(|zis|)+i.Arg(zis)
+    zis.im  = atan2(zis.im, norm);
+    //  norm = zis.re;
+    zres.re = zci.re*zis.re - zci.im*zis.im;   // Re( zci*ln(zis) )
+    zres.im = zci.im*zis.re + zis.im*zci.re;   // Im( zci*ln(zis) )
+    //
+    zis.re  = zres.re;
+    zis.im  = zres.im;
+    //
+    //zres.re = zis.re*b0;
+    //zres.im = zis.im*b0;
+    // rotation
+    clumpgrad.x = zis.re;
+    clumpgrad.y = zis.im;
+    clumpgrad = rotateCoordinateSystem_GPU_2(clumpgrad, cosi,-sinu);
+    //
+    clumpgrad.x = lens->b0[i]*clumpgrad.x;
+    clumpgrad.y = lens->b0[i]*clumpgrad.y;
+    //
+    //clumpgrad.x = lens->b0[i]*zis.re;
+    //clumpgrad.y = lens->b0[i]*zis.im;
+    //nan check
+    //if(clumpgrad.x == clumpgrad.x or clumpgrad.y == clumpgrad.y)
+    //{
+      // add the gradients
+    grad.x += clumpgrad.x;
+    grad.y += clumpgrad.y;
+    //}
+  }
+  //IACA_END;
+  //
+  return(grad);
+}
+
+
+__device__ struct point module_potentialDerivatives_totalGradient_81_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos)
+{
+        //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins");
+  //std::cout << "# module_potentialDerivatives_totalGradient_SOA begins" << std::endl;
+        // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
+        //
+        struct point grad, clumpgrad;
+        grad.x = 0;
+        grad.y = 0;
+        //
+        for(int i = shalos; i < shalos + nhalos; i++)
+        {
+                //IACA_START;
+                //
+                struct point true_coord, true_coord_rot; //, result;
+                //double       R, angular_deviation;
+                complex      zis;
+                //
+                //result.x = result.y = 0.;
+                //
+                true_coord.x = pImage->x - lens->position_x[i];
+                true_coord.y = pImage->y - lens->position_y[i];
+                //positionning at the potential center
+                // Change the origin of the coordinate system to the center of the clump
+                true_coord_rot = rotateCoordinateSystem_GPU(true_coord, lens->ellipticity_angle[i]);
+                //
+                double x    = true_coord_rot.x;
+                double y    = true_coord_rot.y;
+                double eps  = lens->ellipticity_potential[i];
+                double rc   = lens->rcore[i];
+                double rcut = lens->rcut[i];
+    double b0   = lens->b0[i];
+    double t05  = b0*rcut/(rcut - rc);
+    //printf("b0 = %f, rcut = %f, rc = %f, t05 = %f\n", b0, rcut, rc, t05);
+                //
+                //std::cout << "piemd_lderivatives" << std::endl;
+                //
+                double sqe  = sqrt(eps);
+                //
+                double cx1  = (1. - eps)/(1. + eps);
+                double cxro = (1. + eps)*(1. + eps);
+                double cyro = (1. - eps)*(1. - eps);
+                //
+                double rem2 = x*x/cxro + y*y/cyro;
+                //
+                complex zci, znum, zden, zres_rc, zres_rcut;
+                double norm;
+                //
+                zci.re  = 0;
+                zci.im  = -0.5*(1. - eps*eps)/sqe;
+                //
+    // step 1
+    //
+    {
+      znum.re = cx1*x;
+      znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1;
+      //
+      zden.re = x;
+      zden.im = 2.*rc*sqe - y;
+      norm    = (zden.re*zden.re + zden.im*zden.im);     // zis = znum/zden
+      //
+      zis.re  = (znum.re*zden.re + znum.im*zden.im)/norm;
+      zis.im  = (znum.im*zden.re - znum.re*zden.im)/norm;
+      norm    = zis.re;
+      zis.re  = log(sqrt(norm*norm + zis.im*zis.im));  // ln(zis) = ln(|zis|)+i.Arg(zis)
+      zis.im  = atan2(zis.im, norm);
+      //  norm = zis.re;
+      zres_rc.re = zci.re*zis.re - zci.im*zis.im;   // Re( zci*ln(zis) )
+      zres_rc.im = zci.im*zis.re + zis.im*zci.re;   // Im( zci*ln(zis) )
+    }
+    //
+    // step 2
+    //
+    {
+                        znum.re = cx1*x;
+                        znum.im = 2.*sqe*sqrt(rcut*rcut + rem2) - y/cx1;
+                        //
+                        zden.re = x;
+                        zden.im = 2.*rcut*sqe - y;
+                        norm    = (zden.re*zden.re + zden.im*zden.im);     // zis = znum/zden
+                        //
+                        zis.re  = (znum.re*zden.re + znum.im*zden.im)/norm;
+                        zis.im  = (znum.im*zden.re - znum.re*zden.im)/norm;
+                        norm    = zis.re;
+                        zis.re  = log(sqrt(norm*norm + zis.im*zis.im));  // ln(zis) = ln(|zis|)+i.Arg(zis)
+                        zis.im  = atan2(zis.im, norm);
+                        //  norm = zis.re;
+                        zres_rcut.re = zci.re*zis.re - zci.im*zis.im;   // Re( zci*ln(zis) )
+                        zres_rcut.im = zci.im*zis.re + zis.im*zci.re;   // Im( zci*ln(zis) )
+                }
+    zis.re  = t05*(zres_rc.re - zres_rcut.re);
+    zis.im  = t05*(zres_rc.im - zres_rcut.im);
+    //printf("%f %f\n", zis.re, zis.im);
+                //
+                //zres.re = zis.re*b0;
+                //zres.im = zis.im*b0;
+                // rotation
+                clumpgrad.x = zis.re;
+                clumpgrad.y = zis.im;
+                clumpgrad = rotateCoordinateSystem_GPU(clumpgrad, -lens->ellipticity_angle[i]);
+                //
+                //clumpgrad.x = lens->b0[i]*clumpgrad.x;
+                //clumpgrad.y = lens->b0[i]*clumpgrad.y;
+                //
+                //clumpgrad.x = lens->b0[i]*zis.re;
+                //clumpgrad.y = lens->b0[i]*zis.im;
+                //nan check
+                //if(clumpgrad.x == clumpgrad.x or clumpgrad.y == clumpgrad.y)
+                //{
+                        // add the gradients
+                grad.x += clumpgrad.x;
+                grad.y += clumpgrad.y;
+    //printf("grad = %f %f\n", clumpgrad.x, clumpgrad.y);
+                //}
+        }
+        //IACA_END;
+        //
+        return(grad);
+}
+//
+//
+//
+
+typedef struct point (*halo_func_GPU_t) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos);
+
+__constant__ halo_func_GPU_t halo_func_GPU[100] =
+{
+0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_5_SOA_GPU, 0, 0, module_potentialDerivatives_totalGradient_8_SOA_GPU,  0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0,  module_potentialDerivatives_totalGradient_81_SOA_GPU, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+//
+//
+//
+__device__ struct point module_potentialDerivatives_totalGradient_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int nhalos)
+{
+        struct point grad, clumpgrad;
+        //
+        grad.x = clumpgrad.x = 0;
+        grad.y = clumpgrad.y = 0;
+  //
+  int shalos = 0;
+  //
+  //module_potentialDerivatives_totalGradient_81_SOA(pImage, lens, 0, nhalos);
+  //return;
+  /*
+  int* p_type = &(lens->type)[0];
+  int* lens_type = (int*) malloc(nhalos*sizeof(int));
+  memcpy(lens_type, &(lens->type)[0], nhalos*sizeof(int));
+
+  //quicksort(lens_type, nhalos);
+  //*/
+
+  //printf ("%f \n",lens->position_x[shalos]);
+
+
+  while (shalos < nhalos)
+  {
+
+    int lens_type = lens->type[shalos];
+    int count     = 1;
+    while (lens->type[shalos + count] == lens_type) count++;
+    //std::cerr << "type = " << lens_type << " " << count << " " << shalos << std::endl;
+    //printf ("%d %d %d \n",lens_type,count,shalos);
+    //
+    clumpgrad = (*halo_func_GPU[lens_type])(pImage, lens, shalos, count);
+    //
+    grad.x += clumpgrad.x;
+    grad.y += clumpgrad.y;
+    shalos += count;
+  }
+
+        return(grad);
+}
+
+__device__ inline struct point rotateCoordinateSystem_GPU(struct point P, double theta)
+{
+  struct  point   Q;
+
+  Q.x = P.x*cos(theta) + P.y*sin(theta);
+  Q.y = P.y*cos(theta) - P.x*sin(theta);
+
+  return(Q);
+}
+
+__device__ inline struct point rotateCoordinateSystem_GPU_2(struct point P, double cosi, double sinu)
+{
+  struct  point   Q;
+
+  Q.x = P.x*cosi + P.y*sinu;
+  Q.y = P.y*cosi - P.x*sinu;
+
+  return(Q);
+}
diff --git a/Benchmarks/GridGradientBenchmark/gradient_GPU.cuh b/Benchmarks/GridGradientBenchmark/gradient_GPU.cuh
new file mode 100644
index 0000000..4cb3d20
--- /dev/null
+++ b/Benchmarks/GridGradientBenchmark/gradient_GPU.cuh
@@ -0,0 +1,20 @@
+/*
+ * gradient_GPU.cuh
+ *
+ *  Created on: Feb 1, 2017
+ *      Author: cerschae
+ */
+
+#ifndef GRADIENT_GPU_CUH_
+#define GRADIENT_GPU_CUH_
+
+#include "cudafunctions.cuh"
+#include <fstream>
+#include <structure_hpc.h>
+
+__device__ struct point module_potentialDerivatives_totalGradient_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int nhalos);
+
+__device__ inline struct point rotateCoordinateSystem_GPU(struct point P, double theta);
+__device__ inline struct point rotateCoordinateSystem_GPU_2(struct point P, double cosi, double sinu);
+
+#endif /* GRADIENT_GPU_CUH_ */
diff --git a/Benchmarks/GridGradientBenchmark/grid_gradient_GPU.cu b/Benchmarks/GridGradientBenchmark/grid_gradient_GPU.cu
new file mode 100644
index 0000000..a642baf
--- /dev/null
+++ b/Benchmarks/GridGradientBenchmark/grid_gradient_GPU.cu
@@ -0,0 +1,760 @@
+#include <fstream>
+#include "grid_gradient_GPU.cuh"
+
+
+
+void gradient_grid_GPU_sorted(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos ,int nbgridcells){
+
+
+  int nBlocks_gpu = 0;
+  // Define the number of threads per block the GPU will use
+  cudaDeviceProp properties_gpu;
+
+  cudaGetDeviceProperties(&properties_gpu, 0); // Get properties of 0th GPU in use
+
+  if (properties_gpu.maxThreadsDim[0]<threadsPerBlock)
+    {
+    fprintf(stderr, "ERROR: The GPU has to support at least %u threads per block.\n", threadsPerBlock);
+    exit(-1);
+    }
+  else
+    {
+    nBlocks_gpu = properties_gpu.maxGridSize[0] / threadsPerBlock;  // Get the maximum number of blocks with the chosen number of threads
+                    // per Block that the GPU supports
+    }
+
+  grid_param *frame_gpu;
+  Potential_SOA *lens_gpu,*lens_kernel ;
+  int *type_gpu;
+  double *lens_x_gpu, *lens_y_gpu, *b0_gpu, *angle_gpu, *epot_gpu, *rcore_gpu, *rcut_gpu;
+  double *grid_grad_x_gpu, *grid_grad_y_gpu ;
+
+  lens_gpu = (Potential_SOA *) malloc(sizeof(Potential_SOA));
+  lens_gpu->type = (int *) malloc(sizeof(int));
+
+  // Allocate variables on the GPU
+  cudasafe(cudaMalloc( (void**)&(lens_kernel), sizeof(Potential_SOA)),"Gradientgpu.cu : Alloc Potential_SOA: " );
+  cudasafe(cudaMalloc( (void**)&(type_gpu), nhalos*sizeof(int)),"Gradientgpu.cu : Alloc type_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(lens_x_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc x_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(lens_y_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc y_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(b0_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc b0_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(angle_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc angle_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(epot_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc epot_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(rcore_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc rcore_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(rcut_gpu), nhalos*sizeof(double)),"Gradientgpu.cu : Alloc rcut_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(frame_gpu), sizeof(grid_param)),"Gradientgpu.cu : Alloc frame_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(grid_grad_x_gpu), (nbgridcells) * (nbgridcells) *sizeof(double)),"Gradientgpu.cu : Alloc source_x_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(grid_grad_y_gpu), (nbgridcells) * (nbgridcells) *sizeof(double)),"Gradientgpu.cu : Alloc source_y_gpu: " );
+
+  // Copy values to the GPU
+  cudasafe(cudaMemcpy(type_gpu,lens->type , nhalos*sizeof(int),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy type_gpu: " );
+  cudasafe(cudaMemcpy(lens_x_gpu,lens->position_x , nhalos*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy x_gpu: " );
+  cudasafe(cudaMemcpy(lens_y_gpu,lens->position_y , nhalos*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy y_gpu: " );
+  cudasafe(cudaMemcpy(b0_gpu,lens->b0 , nhalos*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy b0_gpu: " );
+  cudasafe(cudaMemcpy(angle_gpu,lens->ellipticity_angle , nhalos*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy angle_gpu: " );
+  cudasafe(cudaMemcpy(epot_gpu, lens->ellipticity_potential, nhalos*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy epot_gpu: " );
+  cudasafe(cudaMemcpy(rcore_gpu, lens->rcore, nhalos*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy rcore_gpu: " );
+  cudasafe(cudaMemcpy(rcut_gpu, lens->rcut, nhalos*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy rcut_gpu: " );
+  cudasafe(cudaMemcpy(frame_gpu, frame, sizeof(grid_param), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy fame_gpu: " );
+
+
+  //printf("%p \n", lens_gpu);
+  //printf("%p \n", type_gpu);
+  //printf("%p \n", lens_gpu->type);
+  //fflush(stdout);
+  lens_gpu->type = type_gpu;
+  lens_gpu->position_x = lens_x_gpu;
+  lens_gpu->position_y = lens_y_gpu;
+  lens_gpu->b0 = b0_gpu;
+  lens_gpu->ellipticity_angle = angle_gpu;
+  lens_gpu->ellipticity_potential = epot_gpu;
+  lens_gpu->rcore = rcore_gpu;
+  lens_gpu->rcut = rcut_gpu;
+
+  cudaMemcpy(lens_kernel, lens_gpu, sizeof(Potential_SOA), cudaMemcpyHostToDevice);
+
+
+  if (int((nbgridcells) * (nbgridcells)/threadsPerBlock) == 0){
+    gradient_grid_kernel<<<1,threadsPerBlock>>>(grid_grad_x_gpu, grid_grad_y_gpu,frame_gpu,nhalos, nbgridcells, lens_kernel);
+  }
+  else{
+    gradient_grid_kernel<<<(nbgridcells) * (nbgridcells)/threadsPerBlock,threadsPerBlock>>>(grid_grad_x_gpu, grid_grad_y_gpu,frame_gpu,nhalos, nbgridcells, lens_kernel);
+  }
+  cudasafe(cudaMemcpy( grid_grad_x, grid_grad_x_gpu, (nbgridcells) * (nbgridcells) *sizeof(double),cudaMemcpyDeviceToHost ),"Gradientgpu.cu : Copy source_x_gpu: " );
+  cudasafe(cudaMemcpy( grid_grad_y, grid_grad_y_gpu, (nbgridcells) * (nbgridcells) *sizeof(double), cudaMemcpyDeviceToHost),"Gradientgpu.cu : Copy source_y_gpu: " );
+
+
+  //printf("%f %f \n",grid_grad_x[0],grid_grad_y[0]);
+
+    // Free GPU memory
+  cudaFree(lens_gpu);
+  cudaFree(type_gpu);
+  cudaFree(lens_x_gpu);
+  cudaFree(lens_y_gpu);
+  cudaFree(b0_gpu);
+  cudaFree(angle_gpu);
+  cudaFree(epot_gpu);
+  cudaFree(rcore_gpu);
+  cudaFree(rcut_gpu);
+  cudaFree(grid_grad_x_gpu);
+  cudaFree(grid_grad_y_gpu);
+
+/*
+  for (int i = 0; i < nbgridcells; i++){
+    for(int j = 0; j < nbgridcells; j++){
+      printf(" %f",grid_grad_x[i*nbgridcells + j]);
+    }
+    printf("\n");
+  }*/
+
+}
+
+
+void gradient_grid_pinned(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int *Nlens,int nbgridcells){
+
+
+  int nBlocks_gpu = 0;
+  // Define the number of threads per block the GPU will use
+  cudaDeviceProp properties_gpu;
+
+  cudaGetDeviceProperties(&properties_gpu, 0); // Get properties of 0th GPU in use
+
+  if (properties_gpu.maxThreadsDim[0]<threadsPerBlock)
+    {
+    fprintf(stderr, "ERROR: The GPU has to support at least %u threads per block.\n", threadsPerBlock);
+    exit(-1);
+    }
+  else
+    {
+    nBlocks_gpu = properties_gpu.maxGridSize[0] / threadsPerBlock;  // Get the maximum number of blocks with the chosen number of threads
+                    // per Block that the GPU supports
+    }
+
+  grid_param *frame_gpu;
+  double *lens_x_gpu, *lens_y_gpu, *b0_gpu, *angle_gpu, *epot_gpu, *rcore_gpu, *rcut_gpu;
+  double *grid_grad_x_gpu, *grid_grad_y_gpu ;
+
+  //SIS//
+
+  // Allocate variables on the GPU
+  cudasafe(cudaMalloc( (void**)&(lens_x_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc x_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(lens_y_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc y_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(b0_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc b0_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(angle_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc angle_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(epot_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc epot_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(rcore_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc rcore_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(rcut_gpu), Nlens[0]*sizeof(double)),"Gradientgpu.cu : Alloc rcut_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(frame_gpu), sizeof(grid_param)),"Gradientgpu.cu : Alloc frame_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(grid_grad_x_gpu), (nbgridcells) * (nbgridcells) *sizeof(double)),"Gradientgpu.cu : Alloc source_x_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(grid_grad_y_gpu), (nbgridcells) * (nbgridcells) *sizeof(double)),"Gradientgpu.cu : Alloc source_y_gpu: " );
+
+  // Copy values to the GPU
+  cudasafe(cudaMemcpy(lens_x_gpu,lens[0].position_x , Nlens[0]*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy x_gpu: " );
+  cudasafe(cudaMemcpy(lens_y_gpu,lens[0].position_y , Nlens[0]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy y_gpu: " );
+  cudasafe(cudaMemcpy(b0_gpu,lens[0].b0 , Nlens[0]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy b0_gpu: " );
+  cudasafe(cudaMemcpy(angle_gpu,lens[0].ellipticity_angle , Nlens[0]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy angle_gpu: " );
+  cudasafe(cudaMemcpy(epot_gpu, lens[0].ellipticity_potential, Nlens[0]*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy epot_gpu: " );
+  cudasafe(cudaMemcpy(rcore_gpu, lens[0].rcore, Nlens[0]*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy rcore_gpu: " );
+  cudasafe(cudaMemcpy(rcut_gpu, lens[0].rcut, Nlens[0]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy rcut_gpu: " );
+  cudasafe(cudaMemcpy(frame_gpu, frame, sizeof(grid_param), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy fame_gpu: " );
+
+  //printf( "%d %d",nBlocks_gpu,(nbgridcells) * (nbgridcells)/threadsPerBlock );
+
+  if (int((nbgridcells) * (nbgridcells)/threadsPerBlock) == 0){
+    gradient_grid_sis_GPU<<<1,threadsPerBlock>>>(grid_grad_x_gpu, grid_grad_y_gpu,frame_gpu,Nlens[0], nbgridcells, lens_x_gpu, lens_y_gpu, b0_gpu, angle_gpu, epot_gpu, rcore_gpu, rcut_gpu);
+  }
+  else{
+  gradient_grid_sis_GPU<<<(nbgridcells) * (nbgridcells)/threadsPerBlock,threadsPerBlock>>>(grid_grad_x_gpu, grid_grad_y_gpu,frame_gpu,Nlens[0], nbgridcells, lens_x_gpu, lens_y_gpu, b0_gpu, angle_gpu, epot_gpu, rcore_gpu, rcut_gpu);
+  }
+  cudasafe(cudaMemcpy( grid_grad_x, grid_grad_x_gpu, (nbgridcells) * (nbgridcells) *sizeof(double),cudaMemcpyDeviceToHost ),"Gradientgpu.cu : Copy source_x_gpu: " );
+  cudasafe(cudaMemcpy( grid_grad_y, grid_grad_y_gpu, (nbgridcells) * (nbgridcells) *sizeof(double), cudaMemcpyDeviceToHost),"Gradientgpu.cu : Copy source_y_gpu: " );
+  //printf ("%f %f \n", grid_grad_x[0],grid_grad_y[0]);
+
+    // Free GPU memory
+  cudaFree(lens_x_gpu);
+  cudaFree(lens_y_gpu);
+  cudaFree(b0_gpu);
+  cudaFree(angle_gpu);
+  cudaFree(epot_gpu);
+  cudaFree(rcore_gpu);
+  cudaFree(rcut_gpu);
+  cudaFree(grid_grad_x_gpu);
+  cudaFree(grid_grad_y_gpu);
+
+
+  //PIEMD8//
+
+  // Allocate variables on the GPU
+  cudasafe(cudaMalloc( (void**)&(lens_x_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc x_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(lens_y_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc y_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(b0_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc b0_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(angle_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc angle_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(epot_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc epot_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(rcore_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc rcore_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(rcut_gpu), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc rcut_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(frame_gpu), sizeof(grid_param)),"Gradientgpu.cu : Alloc frame_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(grid_grad_x_gpu), (nbgridcells) * (nbgridcells) *sizeof(double)),"Gradientgpu.cu : Alloc source_x_gpu: " );
+  cudasafe(cudaMalloc( (void**)&(grid_grad_y_gpu), (nbgridcells) * (nbgridcells) *sizeof(double)),"Gradientgpu.cu : Alloc source_y_gpu: " );
+
+  // Copy values to the GPU
+  cudasafe(cudaMemcpy(lens_x_gpu,lens[1].position_x , Nlens[1]*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy x_gpu: " );
+  cudasafe(cudaMemcpy(lens_y_gpu,lens[1].position_y , Nlens[1]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy y_gpu: " );
+  cudasafe(cudaMemcpy(b0_gpu,lens[1].b0 , Nlens[1]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy b0_gpu: " );
+  cudasafe(cudaMemcpy(angle_gpu,lens[1].ellipticity_angle , Nlens[1]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy angle_gpu: " );
+  cudasafe(cudaMemcpy(epot_gpu, lens[1].ellipticity_potential, Nlens[1]*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy epot_gpu: " );
+  cudasafe(cudaMemcpy(rcore_gpu, lens[1].rcore, Nlens[1]*sizeof(double),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy rcore_gpu: " );
+  cudasafe(cudaMemcpy(rcut_gpu, lens[1].rcut, Nlens[1]*sizeof(double), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy rcut_gpu: " );
+  cudasafe(cudaMemcpy(frame_gpu, frame, sizeof(grid_param), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy fame_gpu: " );
+
+  //printf( "%d %d",nBlocks_gpu,(nbgridcells) * (nbgridcells)/threadsPerBlock );
+
+  if (int((nbgridcells) * (nbgridcells)/threadsPerBlock) == 0){
+    gradient_grid_piemd_GPU<<<1,threadsPerBlock>>>(grid_grad_x_gpu, grid_grad_y_gpu,frame_gpu,Nlens[1], nbgridcells, lens_x_gpu, lens_y_gpu, b0_gpu, angle_gpu, epot_gpu, rcore_gpu, rcut_gpu);
+  }
+  else{
+  gradient_grid_piemd_GPU<<<(nbgridcells) * (nbgridcells)/threadsPerBlock,threadsPerBlock>>>(grid_grad_x_gpu, grid_grad_y_gpu,frame_gpu,Nlens[1], nbgridcells, lens_x_gpu, lens_y_gpu, b0_gpu, angle_gpu, epot_gpu, rcore_gpu, rcut_gpu);
+  }
+  cudasafe(cudaMemcpy( grid_grad_x, grid_grad_x_gpu, (nbgridcells) * (nbgridcells) *sizeof(double),cudaMemcpyDeviceToHost ),"Gradientgpu.cu : Copy source_x_gpu: " );
+  cudasafe(cudaMemcpy( grid_grad_y, grid_grad_y_gpu, (nbgridcells) * (nbgridcells) *sizeof(double), cudaMemcpyDeviceToHost),"Gradientgpu.cu : Copy source_y_gpu: " );
+  //printf ("%f %f \n", grid_grad_x[0],grid_grad_y[0]);
+
+    // Free GPU memory
+  cudaFree(lens_x_gpu);
+  cudaFree(lens_y_gpu);
+  cudaFree(b0_gpu);
+  cudaFree(angle_gpu);
+  cudaFree(epot_gpu);
+  cudaFree(rcore_gpu);
+  cudaFree(rcut_gpu);
+  cudaFree(grid_grad_x_gpu);
+  cudaFree(grid_grad_y_gpu);
+
+  for (int i = 0; i < nbgridcells; i++){
+    for(int j = 0; j < nbgridcells; j++){
+      printf(" %f",grid_grad_x[i*nbgridcells + j]);
+    }
+    printf("\n");
+  }
+/*
+  int nDevices;
+
+    cudaGetDeviceCount(&nDevices);
+    for (int i = 0; i < nDevices; i++) {
+      cudaDeviceProp prop;
+      cudaGetDeviceProperties(&prop, i);
+      printf("Device Number: %d\n", i);
+      printf("  Device name: %s\n", prop.name);
+      printf("  Memory Clock Rate (KHz): %d\n",
+             prop.memoryClockRate);
+      printf("  Memory Bus Width (bits): %d\n",
+             prop.memoryBusWidth);
+      printf("  Peak Memory Bandwidth (GB/s): %f\n\n",
+             2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6);
+    }
+
+*/
+
+
+}
+
+void gradient_grid_pinned_multiple(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int *Nlens,int nbgridcells){
+  //get number of GPU devices
+  int nDevices;
+  cudaGetDeviceCount(&nDevices);
+
+  // Initialise kernel variables, table for multiple devices
+  grid_param *frame_gpu[nDevices];
+  double *lens_x_gpu[nDevices], *lens_y_gpu[nDevices], *b0_gpu[nDevices], *angle_gpu[nDevices], *epot_gpu[nDevices], *rcore_gpu[nDevices], *rcut_gpu[nDevices];
+  double *grid_grad_x_gpu[nDevices], *grid_grad_y_gpu[nDevices] ;
+
+  // Initialise multiple device variables
+  int Ndevice[nDevices], indexactual[nDevices];
+  cudaStream_t stream[nDevices];
+
+  indexactual[0] = 0 ;
+  Ndevice[0] = (nbgridcells) * (nbgridcells)/nDevices;
+  printf("%d %d \n",indexactual[0], Ndevice[0]);
+
+  for (int dev = 1; dev < nDevices; dev++) {
+
+    Ndevice[dev] = (nbgridcells) * (nbgridcells)/nDevices;
+
+    if(indexactual[dev]+Ndevice[dev] > (nbgridcells) * (nbgridcells)){
+      Ndevice[dev] = (nbgridcells) * (nbgridcells) - indexactual[dev-1];
+    }
+
+    indexactual[dev] = indexactual[dev-1] + Ndevice[dev];
+    printf("%d %d \n",indexactual[dev], Ndevice[dev]);
+  }
+
+  for (int dev = 0; dev < nDevices; dev++) {
+
+
+    cudaSetDevice(dev);
+
+    //SIS//
+
+
+    //PIEMD8//
+
+    // Allocate variables on the GPU
+    cudasafe(cudaMalloc( (void**)&(lens_x_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc x_gpu: " );
+    cudasafe(cudaMalloc( (void**)&(lens_y_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc y_gpu: " );
+    cudasafe(cudaMalloc( (void**)&(b0_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc b0_gpu: " );
+    cudasafe(cudaMalloc( (void**)&(angle_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc angle_gpu: " );
+    cudasafe(cudaMalloc( (void**)&(epot_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc epot_gpu: " );
+    cudasafe(cudaMalloc( (void**)&(rcore_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc rcore_gpu: " );
+    cudasafe(cudaMalloc( (void**)&(rcut_gpu[dev]), Nlens[1]*sizeof(double)),"Gradientgpu.cu : Alloc rcut_gpu: " );
+    cudasafe(cudaMalloc( (void**)&(frame_gpu[dev]), sizeof(grid_param)),"Gradientgpu.cu : Alloc frame_gpu: " );
+    cudasafe(cudaMalloc( (void**)&(grid_grad_x_gpu[dev]), Ndevice[dev] *sizeof(double)),"Gradientgpu.cu : Alloc source_x_gpu: " );
+    cudasafe(cudaMalloc( (void**)&(grid_grad_y_gpu[dev]), Ndevice[dev] *sizeof(double)),"Gradientgpu.cu : Alloc source_y_gpu: " );
+
+
+    cudaStreamCreate(&stream[dev]);
+
+  }
+
+  for (int dev = 0; dev < nDevices; dev++) {
+
+
+    cudaSetDevice(dev);
+
+    // Copy values to the GPU
+    cudasafe(cudaMemcpyAsync(lens_x_gpu[dev],lens[1].position_x , Nlens[1]*sizeof(double),cudaMemcpyHostToDevice,stream[dev] ),"Gradientgpu.cu : Copy x_gpu: " );
+    cudasafe(cudaMemcpyAsync(lens_y_gpu[dev],lens[1].position_y , Nlens[1]*sizeof(double), cudaMemcpyHostToDevice,stream[dev]),"Gradientgpu.cu : Copy y_gpu: " );
+    cudasafe(cudaMemcpyAsync(b0_gpu[dev],lens[1].b0 , Nlens[1]*sizeof(double), cudaMemcpyHostToDevice,stream[dev]),"Gradientgpu.cu : Copy b0_gpu: " );
+    cudasafe(cudaMemcpyAsync(angle_gpu[dev],lens[1].ellipticity_angle , Nlens[1]*sizeof(double), cudaMemcpyHostToDevice,stream[dev]),"Gradientgpu.cu : Copy angle_gpu: " );
+    cudasafe(cudaMemcpyAsync(epot_gpu[dev], lens[1].ellipticity_potential, Nlens[1]*sizeof(double),cudaMemcpyHostToDevice ,stream[dev]),"Gradientgpu.cu : Copy epot_gpu: " );
+    cudasafe(cudaMemcpyAsync(rcore_gpu[dev], lens[1].rcore, Nlens[1]*sizeof(double),cudaMemcpyHostToDevice ,stream[dev]),"Gradientgpu.cu : Copy rcore_gpu: " );
+    cudasafe(cudaMemcpyAsync(rcut_gpu[dev], lens[1].rcut, Nlens[1]*sizeof(double), cudaMemcpyHostToDevice,stream[dev]),"Gradientgpu.cu : Copy rcut_gpu: " );
+    cudasafe(cudaMemcpyAsync(frame_gpu[dev], frame, sizeof(grid_param), cudaMemcpyHostToDevice,stream[dev]),"Gradientgpu.cu : Copy fame_gpu: " );
+  }
+
+  for (int dev = 0; dev < nDevices; dev++) {
+
+
+    cudaSetDevice(dev);
+    int nBlocks_gpu = 0;
+    cudaDeviceProp properties_gpu;
+    cudaGetDeviceProperties(&properties_gpu, dev); // Get properties of 0th GPU in use
+
+    if (properties_gpu.maxThreadsDim[0]<threadsPerBlock)
+      {
+      fprintf(stderr, "ERROR: The GPU has to support at least %u threads per block.\n", threadsPerBlock);
+      exit(-1);
+      }
+
+
+    if (int((nbgridcells) * (nbgridcells)/threadsPerBlock) == 0){
+      gradient_grid_piemd_GPU_multiple<<<1,threadsPerBlock,0,stream[dev]>>>(grid_grad_x_gpu[dev], grid_grad_y_gpu[dev],frame_gpu[dev],Nlens[1], nbgridcells,Ndevice[dev],indexactual[dev], lens_x_gpu[dev], lens_y_gpu[dev], b0_gpu[dev], angle_gpu[dev], epot_gpu[dev], rcore_gpu[dev], rcut_gpu[dev]);
+    }
+    else{
+      gradient_grid_piemd_GPU_multiple<<<(nbgridcells) * (nbgridcells)/threadsPerBlock,threadsPerBlock,0,stream[dev]>>>(grid_grad_x_gpu[dev], grid_grad_y_gpu[dev],frame_gpu[dev],Nlens[1], nbgridcells, Ndevice[dev],indexactual[dev], lens_x_gpu[dev], lens_y_gpu[dev], b0_gpu[dev], angle_gpu[dev], epot_gpu[dev], rcore_gpu[dev], rcut_gpu[dev]);
+    }
+  }
+
+  for (int dev = 0; dev < nDevices; dev++) {
+    cudasafe(cudaMemcpyAsync( grid_grad_x + indexactual[dev], grid_grad_x_gpu[dev], Ndevice[dev] *sizeof(double),cudaMemcpyDeviceToHost ,stream[dev]),"Gradientgpu.cu : Copy source_x_gpu: " );
+    cudasafe(cudaMemcpyAsync( grid_grad_y + indexactual[dev], grid_grad_y_gpu[dev], Ndevice[dev] *sizeof(double), cudaMemcpyDeviceToHost,stream[dev]),"Gradientgpu.cu : Copy source_y_gpu: " );
+  }
+
+  for (int dev = 0; dev < nDevices; dev++) {
+
+
+    cudaSetDevice(dev);
+      // Free GPU memory
+    cudaFree(lens_x_gpu[dev]);
+    cudaFree(lens_y_gpu[dev]);
+    cudaFree(b0_gpu[dev]);
+    cudaFree(angle_gpu[dev]);
+    cudaFree(epot_gpu[dev]);
+    cudaFree(rcore_gpu[dev]);
+    cudaFree(rcut_gpu[dev]);
+    cudaFree(grid_grad_x_gpu[dev]);
+    cudaFree(grid_grad_y_gpu[dev]);
+    cudaStreamDestroy(stream[dev]);
+
+  }
+/*
+  for (int i = 0; i < nbgridcells; i++){
+    for(int j = 0; j < nbgridcells; j++){
+      printf(" %f",grid_grad_x[i*nbgridcells + j]);
+    }
+    printf("\n");
+  }*/
+
+
+
+}
+
+__global__ void gradient_grid_kernel(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcells, const struct Potential_SOA *lens) {
+
+  //*grad_x = *grad_y = 0.;
+
+  int bid=blockIdx.x; // index of the block (and of the set of images)
+  int tid=threadIdx.x; // index of the thread within the block
+
+  double dx,dy;        //pixelsize
+  int grid_dim, index;
+  struct point image_point, Grad;
+  dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
+  dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
+  grid_dim = (nbgridcells);
+
+  index = bid * threadsPerBlock + tid ;
+
+  while(index < grid_dim*grid_dim){
+
+    grid_grad_x[index] = 0.;
+    grid_grad_y[index] = 0.;
+
+    image_point.x = frame->xmin + (index/grid_dim)*dx;
+    image_point.y = frame->ymin + (index % grid_dim)*dy;
+
+    Grad = module_potentialDerivatives_totalGradient_SOA_GPU(&image_point, lens, Nlens);
+
+    grid_grad_x[index] = Grad.x;
+    grid_grad_y[index] = Grad.y;
+
+    bid += gridDim.x;
+    index = bid * threadsPerBlock + tid;
+  }
+
+
+}
+
+
+__global__ void gradient_grid_sis_GPU(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcells, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut) {
+
+  //*grad_x = *grad_y = 0.;
+
+  int bid=blockIdx.x; // index of the block (and of the set of images)
+  int tid=threadIdx.x; // index of the thread within the block
+
+  double dx,dy,x_pos,y_pos;        //pixelsize
+  int grid_dim, index;
+  struct point true_coord, true_coord_rotation;
+  double      R;
+  dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
+  dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
+  grid_dim = (nbgridcells);
+
+  index = bid * threadsPerBlock + tid ;
+
+
+
+  while(index < grid_dim*grid_dim){
+
+    grid_grad_x[index] = 0.;
+    grid_grad_y[index] = 0.;
+
+    x_pos= frame->xmin + (index/grid_dim)*dx;
+    y_pos= frame->ymin + (index % grid_dim)*dy;
+    //printf("%f %f \n", x_pos, y_pos);
+
+    for(int iterator=0; iterator < Nlens; iterator++){
+
+      true_coord.x = x_pos - lens_x[iterator];
+      true_coord.y = y_pos - lens_y[iterator];
+
+      // Change the origin of the coordinate system to the center of the clump
+      true_coord_rotation = rotateCoordinateSystem_GPU(true_coord, lens_angle[iterator]);
+
+      R=sqrt(true_coord_rotation.x*true_coord_rotation.x*(1-lens_epot[iterator])+true_coord_rotation.y*true_coord_rotation.y*(1+lens_epot[iterator]));  //ellippot = ellipmass/3
+      grid_grad_x[index] += (1-lens_epot[iterator])*lens_b0[iterator]*true_coord_rotation.x/(R);
+      grid_grad_y[index] += (1+lens_epot[iterator])*lens_b0[iterator]*true_coord_rotation.y/(R);
+
+
+
+    }
+
+
+
+
+    bid += gridDim.x;
+    index = bid * threadsPerBlock + tid;
+  }
+
+
+}
+
+__global__ void gradient_grid_piemd_GPU(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcells, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut) {
+
+  //*grad_x = *grad_y = 0.;
+
+  int bid=blockIdx.x; // index of the block (and of the set of images)
+  int tid=threadIdx.x; // index of the thread within the block
+
+  double dx,dy,x_pos,y_pos;        //pixelsize
+  int grid_dim, index;
+  struct point true_coord, true_coord_rotation;
+  complex      zis;
+  dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
+  dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
+  grid_dim = (nbgridcells);
+
+  index = bid * threadsPerBlock + tid ;
+
+
+
+  while(index < grid_dim*grid_dim){
+
+    //grid_grad_x[index] = 0.;
+    //grid_grad_y[index] = 0.;
+
+    x_pos= frame->xmin + (index/grid_dim)*dx;
+    y_pos= frame->ymin + (index % grid_dim)*dy;
+    //printf("%f %f \n", x_pos, y_pos);
+
+    for(int iterator=0; iterator < Nlens; iterator++){
+
+      true_coord.x = x_pos - lens_x[iterator];
+      true_coord.y = y_pos - lens_y[iterator];
+
+      // Change the origin of the coordinate system to the center of the clump
+      true_coord_rotation = rotateCoordinateSystem_GPU(true_coord, lens_angle[iterator]);
+
+      double x   = true_coord_rotation.x;
+      double y   = true_coord_rotation.y;
+      double eps = lens_epot[iterator];
+      double rc  = lens_rcore[iterator];
+
+      double sqe  = sqrt(eps);
+      //
+      double cx1  = (1. - eps)/(1. + eps);
+      double cxro = (1. + eps)*(1. + eps);
+      double cyro = (1. - eps)*(1. - eps);
+      //
+      double rem2 = x*x/cxro + y*y/cyro;
+
+      complex zci, znum, zden, zres;
+      double norm;
+      //
+      zci.re  = 0;
+      zci.im  = -0.5*(1. - eps*eps)/sqe;
+      //
+      znum.re = cx1*x;
+      znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1;
+      //
+      zden.re = x;
+      zden.im = 2.*rc*sqe - y;
+      norm    = (zden.re*zden.re + zden.im*zden.im);     // zis = znum/zden
+      //
+      zis.re  = (znum.re*zden.re + znum.im*zden.im)/norm;
+      zis.im  = (znum.im*zden.re - znum.re*zden.im)/norm;
+      norm    = zis.re;
+      zis.re  = log(sqrt(norm*norm + zis.im*zis.im));  // ln(zis) = ln(|zis|)+i.Arg(zis)
+      zis.im  = atan2(zis.im, norm);
+      //  norm = zis.re;
+      zres.re = zci.re*zis.re - zci.im*zis.im;   // Re( zci*ln(zis) )
+      zres.im = zci.im*zis.re + zis.im*zci.re;   // Im( zci*ln(zis) )
+      //
+      zis.re  = zres.re;
+      zis.im  = zres.im;
+
+      grid_grad_x[index] += lens_b0[iterator]*zis.re;
+      grid_grad_y[index] += lens_b0[iterator]*zis.im;
+
+
+
+    }
+
+
+
+
+    bid += gridDim.x;
+    index = bid * threadsPerBlock + tid;
+  }
+
+
+}
+
+__global__ void gradient_grid_piemd_GPU_multiple(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcells, int Ndevice, int indexactual, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut) {
+
+  //*grad_x = *grad_y = 0.;
+
+  int bid=blockIdx.x; // index of the block (and of the set of images)
+  int tid=threadIdx.x; // index of the thread within the block
+
+  double dx,dy,x_pos,y_pos;        //pixelsize
+  int grid_dim, index;
+  struct point true_coord, true_coord_rotation;
+  complex      zis;
+  dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
+  dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
+  grid_dim = (nbgridcells);
+
+  index = bid * threadsPerBlock + tid ;
+
+
+
+  while(index < Ndevice){
+    //printf("%d \n", index);
+
+    grid_grad_x[index] = 0.;
+    grid_grad_y[index] = 0.;
+
+    x_pos= frame->xmin + ((indexactual +index)/grid_dim)*dx;
+    y_pos= frame->ymin + ((indexactual +index) % grid_dim)*dy;
+    //printf("%f %f \n", x_pos, y_pos);
+
+    for(int iterator=0; iterator < Nlens; iterator++){
+
+      true_coord.x = x_pos - lens_x[iterator];
+      true_coord.y = y_pos - lens_y[iterator];
+
+      // Change the origin of the coordinate system to the center of the clump
+      true_coord_rotation = rotateCoordinateSystem_GPU(true_coord, lens_angle[iterator]);
+
+      double x   = true_coord_rotation.x;
+      double y   = true_coord_rotation.y;
+      double eps = lens_epot[iterator];
+      double rc  = lens_rcore[iterator];
+
+      double sqe  = sqrt(eps);
+      //
+      double cx1  = (1. - eps)/(1. + eps);
+      double cxro = (1. + eps)*(1. + eps);
+      double cyro = (1. - eps)*(1. - eps);
+      //
+      double rem2 = x*x/cxro + y*y/cyro;
+
+      complex zci, znum, zden, zres;
+      double norm;
+      //
+      zci.re  = 0;
+      zci.im  = -0.5*(1. - eps*eps)/sqe;
+      //
+      znum.re = cx1*x;
+      znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1;
+      //
+      zden.re = x;
+      zden.im = 2.*rc*sqe - y;
+      norm    = (zden.re*zden.re + zden.im*zden.im);     // zis = znum/zden
+      //
+      zis.re  = (znum.re*zden.re + znum.im*zden.im)/norm;
+      zis.im  = (znum.im*zden.re - znum.re*zden.im)/norm;
+      norm    = zis.re;
+      zis.re  = log(sqrt(norm*norm + zis.im*zis.im));  // ln(zis) = ln(|zis|)+i.Arg(zis)
+      zis.im  = atan2(zis.im, norm);
+      //  norm = zis.re;
+      zres.re = zci.re*zis.re - zci.im*zis.im;   // Re( zci*ln(zis) )
+      zres.im = zci.im*zis.re + zis.im*zci.re;   // Im( zci*ln(zis) )
+      //
+      zis.re  = zres.re;
+      zis.im  = zres.im;
+
+      grid_grad_x[index] += lens_b0[iterator]*zis.re;
+      grid_grad_y[index] += lens_b0[iterator]*zis.im;
+
+
+
+    }
+
+
+
+
+    bid += gridDim.x;
+
+    index = bid * threadsPerBlock + tid;
+  }
+
+
+}
+
+
+
+__global__ void piemd_GPU(double *grad_x, double *grad_y, point *pImage, int Nlens, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut) {
+
+  //*grad_x = *grad_y = 0.;
+
+  int bid=blockIdx.x; // index of the block (and of the set of images)
+  int tid=threadIdx.x; // index of the thread within the block
+
+  struct point true_coord, true_coord_rotation;
+  complex      zis;
+  //gridDim.x , threadsPerBlock;
+
+  int iterator = bid * threadsPerBlock + tid ;
+  //for(int iterator = 0; iterator < Nlens; ++iterator){
+  while(iterator < Nlens){
+    //printf(" %d %d %d %d \n",iterator , bid , threadsPerBlock , tid );
+    true_coord.x = pImage->x - lens_x[iterator];
+    true_coord.y = pImage->y - lens_y[iterator];
+
+    // Change the origin of the coordinate system to the center of the clump
+    true_coord_rotation = rotateCoordinateSystem_GPU(true_coord, lens_angle[iterator]);
+
+    double x   = true_coord_rotation.x;
+    double y   = true_coord_rotation.y;
+    double eps = lens_epot[iterator];
+    double rc  = lens_rcore[iterator];
+
+    double sqe  = sqrt(eps);
+    //
+    double cx1  = (1. - eps)/(1. + eps);
+    double cxro = (1. + eps)*(1. + eps);
+    double cyro = (1. - eps)*(1. - eps);
+    //
+    double rem2 = x*x/cxro + y*y/cyro;
+
+    complex zci, znum, zden, zres;
+    double norm;
+    //
+    zci.re  = 0;
+    zci.im  = -0.5*(1. - eps*eps)/sqe;
+    //
+    znum.re = cx1*x;
+    znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1;
+    //
+    zden.re = x;
+    zden.im = 2.*rc*sqe - y;
+    norm    = (zden.re*zden.re + zden.im*zden.im);     // zis = znum/zden
+    //
+    zis.re  = (znum.re*zden.re + znum.im*zden.im)/norm;
+    zis.im  = (znum.im*zden.re - znum.re*zden.im)/norm;
+    norm    = zis.re;
+    zis.re  = log(sqrt(norm*norm + zis.im*zis.im));  // ln(zis) = ln(|zis|)+i.Arg(zis)
+    zis.im  = atan2(zis.im, norm);
+    //  norm = zis.re;
+    zres.re = zci.re*zis.re - zci.im*zis.im;   // Re( zci*ln(zis) )
+    zres.im = zci.im*zis.re + zis.im*zci.re;   // Im( zci*ln(zis) )
+    //
+    zis.re  = zres.re;
+    zis.im  = zres.im;
+    //
+    //zres.re = zis.re*b0;
+    //zres.im = zis.im*b0;
+    //
+    //
+
+    //grad->x += lens_b0[iterator]*zis.re;
+    //grad->y += lens_b0[iterator]*zis.im;
+    atomicAdd_double(grad_x,lens_b0[iterator]*zis.re);
+    atomicAdd_double(grad_y,lens_b0[iterator]*zis.im);
+
+    //printf("%f %f \n ", lens_b0[iterator]*zis.re, lens_b0[iterator]*zis.im);
+    //printf("%f %f \n ", *grad_x, *grad_y);
+
+    bid += gridDim.x;
+    iterator = bid * threadsPerBlock + tid ;
+
+  }
+
+
+}
+
+__device__ static double atomicAdd_double(double* address, double val)
+{
+    unsigned long long int* address_as_ull =
+                                          (unsigned long long int*)address;
+    unsigned long long int old = *address_as_ull, assumed;
+    do {
+        assumed = old;
+        old = atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(val +
+                        __longlong_as_double(assumed)));
+    } while (assumed != old);
+    return __longlong_as_double(old);
+}
+
diff --git a/Benchmarks/GridGradientBenchmark/grid_gradient_GPU.cuh b/Benchmarks/GridGradientBenchmark/grid_gradient_GPU.cuh
new file mode 100644
index 0000000..1f136fa
--- /dev/null
+++ b/Benchmarks/GridGradientBenchmark/grid_gradient_GPU.cuh
@@ -0,0 +1,27 @@
+/*
+ * gradientgpu.cuh
+ *
+ *  Created on: Nov 29, 2016
+ *      Author: cerschae
+ */
+
+#ifndef GRID_GRADIENT_GPU_CUH_
+#define GRID_GRADIENT_GPU_CUH_
+
+#include "cudafunctions.cuh"
+#include "gradient_GPU.cuh"
+#include <structure_hpc.h>
+
+void gradient_grid_GPU_sorted(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int Nlens,int nbgridcells);
+void gradient_grid_pinned(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int *Nlens,int nbgridcell);
+void gradient_grid_pinned_multiple(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int *Nlens,int nbgridcell);
+
+__global__ void gradient_grid_kernel(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcells, const struct Potential_SOA *lens);
+__global__ void gradient_grid_piemd_GPU(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcell, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut);
+__global__ void gradient_grid_sis_GPU(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcell, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut);
+__global__ void gradient_grid_piemd_GPU_multiple(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens,int nbgridcell, int Ndevice, int indexactual, double * lens_x, double *lens_y, double * lens_b0, double *lens_angle, double *lens_epot, double *lens_rcore, double *lens_rcut);
+
+//__device__ struct point rotateCoordinateSystem_GPU(struct point P, double theta);
+__device__ static double atomicAdd_double(double* address, double val);
+
+#endif /* GRADIENTGPU_CUH_ */
diff --git a/Benchmarks/GridGradientBenchmark/main.cpp b/Benchmarks/GridGradientBenchmark/main.cpp
index 2977ffc..10f0476 100644
--- a/Benchmarks/GridGradientBenchmark/main.cpp
+++ b/Benchmarks/GridGradientBenchmark/main.cpp
@@ -1,254 +1,271 @@
-	/**
-	* @file   main.cpp
-	* @Author Christoph Schaaefer, EPFL (christophernstrerne.schaefer@epfl.ch)
-	* @date   October 2016
-	* @brief  Benchmark for gradhalo function
-	*/
-
-	#include <iostream>
-	#include <iomanip>
-	#include <string.h>
-	#include <math.h>
-	#include <sys/time.h>
-	#include <fstream>
-	#include <sys/stat.h>
-	//
-	#include <mm_malloc.h>
-	//
-	#include <structure_hpc.h>
-	#include "timer.h"
-	#include "gradient.hpp"
-	#include "chi_CPU.hpp"
-	#include "module_cosmodistances.h"
-	#include "module_readParameters.hpp"
-	#include<omp.h>
-
-	int module_readCheckInput_readInput(int argc, char *argv[])
-	{
-	/// check if there is a correct number of arguments, and store the name of the input file in infile
-
-	char* infile;
-	struct stat  file_stat;
-
-	// If we do not have 3 arguments, stop
-	if ( argc != 3 )
-	{
-		fprintf(stderr, "\nUnexpected number of arguments\n");
-		fprintf(stderr, "\nUSAGE:\n");
-		fprintf(stderr, "lenstool  input_file  output_directorypath  [-n]\n\n");
-		exit(-1);
-	}
-	else if ( argc == 3 )
-		infile=argv[1];
-	std::ifstream ifile(infile,std::ifstream::in); // Open the file
-
-		
-	int ts = (int) time (NULL);
-	char buffer[10];
-	std::stringstream ss;
-	ss << ts;
-	std::string trimstamp = ss.str();
-	//
-	std::string outdir = argv[2];
-	outdir += "-";
-	outdir += trimstamp;
-	std::cout << outdir << std::endl;
-
-	// check whether the output directory already exists
-	if (stat(outdir.c_str(), &file_stat) < 0){
-		mkdir(outdir.c_str(), S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH );
-	}
-	else {
-		printf("Error : Directory %s already exists. Specify a non existing directory.\n",argv[2]);
-		exit(-1);
-	}
-
-	// check whether the input file exists. If it could not be opened (ifile = 0), it does not exist
-	if(ifile){
-	ifile.close();
-	}
-	else{
-	printf("The file %s does not exist, please specify a valid file name\n",infile);
+/**
+* @file   main.cpp
+* @Author Christoph Schaaefer, EPFL (christophernstrerne.schaefer@epfl.ch)
+* @date   October 2016
+* @brief  Benchmark for gradhalo function
+*/
+
+#include <iostream>
+#include <iomanip>
+#include <string.h>
+#include <math.h>
+#include <sys/time.h>
+#include <fstream>
+#include <sys/stat.h>
+//
+#include <mm_malloc.h>
+//
+#include <cuda_runtime.h>
+#include <structure_hpc.h>
+#include "timer.h"
+#include "gradient.hpp"
+#include "chi_CPU.hpp"
+#include "module_cosmodistances.h"
+#include "module_readParameters.hpp"
+#include "grid_gradient_GPU.cuh"
+#include<omp.h>
+
+int module_readCheckInput_readInput(int argc, char *argv[])
+{
+/// check if there is a correct number of arguments, and store the name of the input file in infile
+
+char* infile;
+struct stat  file_stat;
+
+// If we do not have 3 arguments, stop
+if ( argc != 3 )
+{
+	fprintf(stderr, "\nUnexpected number of arguments\n");
+	fprintf(stderr, "\nUSAGE:\n");
+	fprintf(stderr, "lenstool  input_file  output_directorypath  [-n]\n\n");
+	exit(-1);
+}
+else if ( argc == 3 )
+	infile=argv[1];
+std::ifstream ifile(infile,std::ifstream::in); // Open the file
+
+
+int ts = (int) time (NULL);
+char buffer[10];
+std::stringstream ss;
+ss << ts;
+std::string trimstamp = ss.str();
+//
+std::string outdir = argv[2];
+outdir += "-";
+outdir += trimstamp;
+std::cout << outdir << std::endl;
+
+// check whether the output directory already exists
+if (stat(outdir.c_str(), &file_stat) < 0){
+	mkdir(outdir.c_str(), S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH );
+}
+else {
+	printf("Error : Directory %s already exists. Specify a non existing directory.\n",argv[2]);
 	exit(-1);
-		}
+}
 
-	return 0;
+// check whether the input file exists. If it could not be opened (ifile = 0), it does not exist
+if(ifile){
+ifile.close();
+}
+else{
+printf("The file %s does not exist, please specify a valid file name\n",infile);
+exit(-1);
 	}
 
-	int main(int argc, char *argv[])
-	{
-
-
-	// Setting Up the problem
-	//===========================================================================================================
-
-	// This module function reads the terminal input when calling LENSTOOL and checks that it is correct
-	// Otherwise it exits LENSTOOL
-	module_readCheckInput_readInput(argc, argv);
-
-	// This module function reads the cosmology parameters from the parameter file
-	// Input: struct cosmologicalparameters cosmology, parameter file
-	// Output: Initialized cosmology struct
-	cosmo_param cosmology;  // Cosmology struct to store the cosmology data from the file
-	std::string inputFile = argv[1];   // Input file
-	module_readParameters_readCosmology(inputFile, cosmology);
-
-	// This module function reads the runmode paragraph and the number of sources, arclets, etc. in the parameter file.
-	// The runmode_param stores the information of what exactly the user wants to do with lenstool.
-	struct runmode_param runmode;
-	module_readParameters_readRunmode(inputFile, &runmode);
-
-	module_readParameters_debug_cosmology(runmode.debug, cosmology);
-	module_readParameters_debug_runmode(runmode.debug, runmode);
-
-
-	//=== Declaring variables
-	struct grid_param frame;
-	struct galaxy images[runmode.nimagestot];
-	struct galaxy sources[runmode.nsets];
-	struct Potential lenses[runmode.nhalos+runmode.npotfile-1];
-	struct Potential_SOA lenses_SOA_table[NTYPES];
-	struct Potential_SOA lenses_SOA;
-	struct cline_param cline;
-	struct potfile_param potfile;
-	struct Potential potfilepotentials[runmode.npotfile];
-	struct potentialoptimization host_potentialoptimization[runmode.nhalos];
-	int nImagesSet[runmode.nsets]; // Contains the number of images in each set of images
-
-	// This module function reads in the potential form and its parameters (e.g. NFW)
-	// Input: input file
-	// Output: Potentials and its parameters
-
-
-	module_readParameters_Potential(inputFile, lenses, runmode.nhalos);
-	//Converts to SOA
-	//module_readParameters_PotentialSOA(inputFile, lenses, lenses_SOA, runmode.Nlens);
-	module_readParameters_PotentialSOA(inputFile, lenses, &lenses_SOA, runmode.nhalos);
-	//module_readParameters_PotentialSOA_nonsorted(inputFile, lenses, &lenses_SOA_nonsorted, runmode.nhalos);
-	module_readParameters_debug_potential(runmode.debug, lenses, runmode.nhalos);
-	//std::cerr << lenses_SOA[1].b0[0] << " " << lenses[0].b0  << std::endl;
-	// This module function reads in the potfiles parameters
-	// Input: input file
-	// Output: Potentials from potfiles and its parameters
-
-	if (runmode.potfile == 1 ){
-		module_readParameters_readpotfiles_param(inputFile, &potfile);
-		module_readParameters_debug_potfileparam(runmode.debug, &potfile);
-		module_readParameters_readpotfiles(&runmode,&potfile,lenses);
-		module_readParameters_debug_potential(runmode.debug, lenses, runmode.nhalos+runmode.npotfile);
+return 0;
+}
 
-	}
+int main(int argc, char *argv[])
+{
+
+
+// Setting Up the problem
+//===========================================================================================================
+
+// This module function reads the terminal input when calling LENSTOOL and checks that it is correct
+// Otherwise it exits LENSTOOL
+module_readCheckInput_readInput(argc, argv);
+
+// This module function reads the cosmology parameters from the parameter file
+// Input: struct cosmologicalparameters cosmology, parameter file
+// Output: Initialized cosmology struct
+cosmo_param cosmology;  // Cosmology struct to store the cosmology data from the file
+std::string inputFile = argv[1];   // Input file
+module_readParameters_readCosmology(inputFile, cosmology);
+
+// This module function reads the runmode paragraph and the number of sources, arclets, etc. in the parameter file.
+// The runmode_param stores the information of what exactly the user wants to do with lenstool.
+struct runmode_param runmode;
+module_readParameters_readRunmode(inputFile, &runmode);
+
+module_readParameters_debug_cosmology(runmode.debug, cosmology);
+module_readParameters_debug_runmode(runmode.debug, runmode);
+
+
+//=== Declaring variables
+struct grid_param frame;
+struct galaxy images[runmode.nimagestot];
+struct galaxy sources[runmode.nsets];
+struct Potential lenses[runmode.nhalos+runmode.npotfile-1];
+struct Potential_SOA lenses_SOA_table[NTYPES];
+struct Potential_SOA lenses_SOA;
+struct cline_param cline;
+struct potfile_param potfile;
+struct Potential potfilepotentials[runmode.npotfile];
+struct potentialoptimization host_potentialoptimization[runmode.nhalos];
+int nImagesSet[runmode.nsets]; // Contains the number of images in each set of images
+
+// This module function reads in the potential form and its parameters (e.g. NFW)
+// Input: input file
+// Output: Potentials and its parameters
+
+
+module_readParameters_Potential(inputFile, lenses, runmode.nhalos);
+//Converts to SOA
+//module_readParameters_PotentialSOA(inputFile, lenses, lenses_SOA, runmode.Nlens);
+module_readParameters_PotentialSOA(inputFile, lenses, &lenses_SOA, runmode.nhalos);
+//module_readParameters_PotentialSOA_nonsorted(inputFile, lenses, &lenses_SOA_nonsorted, runmode.nhalos);
+module_readParameters_debug_potential(runmode.debug, lenses, runmode.nhalos);
+//std::cerr << lenses_SOA[1].b0[0] << " " << lenses[0].b0  << std::endl;
+// This module function reads in the potfiles parameters
+// Input: input file
+// Output: Potentials from potfiles and its parameters
+
+if (runmode.potfile == 1 ){
+	module_readParameters_readpotfiles_param(inputFile, &potfile);
+	module_readParameters_debug_potfileparam(runmode.debug, &potfile);
+	module_readParameters_readpotfiles(&runmode,&potfile,lenses);
+	module_readParameters_debug_potential(runmode.debug, lenses, runmode.nhalos+runmode.npotfile);
 
+}
 
 
-	// This module function reads in the grid form and its parameters
-	// Input: input file
-	// Output: grid and its parameters
 
+// This module function reads in the grid form and its parameters
+// Input: input file
+// Output: grid and its parameters
 
-	module_readParameters_Grid(inputFile, &frame);
 
+module_readParameters_Grid(inputFile, &frame);
 
 
 
-	if (runmode.image == 1 or runmode.inverse == 1 or runmode.time > 0){
 
-		// This module function reads in the strong lensing images
-		module_readParameters_readImages(&runmode, images, nImagesSet);
-		//runmode.nsets = runmode.nimagestot;
-		for(int i = 0; i < runmode.nimagestot; ++i){
+if (runmode.image == 1 or runmode.inverse == 1 or runmode.time > 0){
 
-			images[i].dls = module_cosmodistances_objectObject(lenses[0].z, images[i].redshift, cosmology);
-			images[i].dos = module_cosmodistances_observerObject(images[i].redshift, cosmology);
-			images[i].dr = module_cosmodistances_lensSourceToObserverSource(lenses[0].z, images[i].redshift, cosmology);
+	// This module function reads in the strong lensing images
+	module_readParameters_readImages(&runmode, images, nImagesSet);
+	//runmode.nsets = runmode.nimagestot;
+	for(int i = 0; i < runmode.nimagestot; ++i){
 
-		}
-		module_readParameters_debug_image(runmode.debug, images, nImagesSet,runmode.nsets);
+		images[i].dls = module_cosmodistances_objectObject(lenses[0].z, images[i].redshift, cosmology);
+		images[i].dos = module_cosmodistances_observerObject(images[i].redshift, cosmology);
+		images[i].dr = module_cosmodistances_lensSourceToObserverSource(lenses[0].z, images[i].redshift, cosmology);
 
 	}
+	module_readParameters_debug_image(runmode.debug, images, nImagesSet,runmode.nsets);
 
-	if (runmode.inverse == 1){
-
-		// This module function reads in the potential optimisation limits
-		module_readParameters_limit(inputFile,host_potentialoptimization,runmode.nhalos);
-		module_readParameters_debug_limit(runmode.debug, host_potentialoptimization[0]);
-	}
+}
 
+if (runmode.inverse == 1){
 
-	if (runmode.source == 1){
-		//Initialisation to default values.(Setting sources to z = 1.5 default value)
-		for(int i = 0; i < runmode.nsets; ++i){
-			sources[i].redshift = 1.5;
-		}
-		// This module function reads in the strong lensing sources
-		module_readParameters_readSources(&runmode, sources);
-		//Calculating cosmoratios
-		for(int i = 0; i < runmode.nsets; ++i){
+	// This module function reads in the potential optimisation limits
+	module_readParameters_limit(inputFile,host_potentialoptimization,runmode.nhalos);
+	module_readParameters_debug_limit(runmode.debug, host_potentialoptimization[0]);
+}
 
-			sources[i].dls = module_cosmodistances_objectObject(lenses[0].z, sources[i].redshift, cosmology);
-			sources[i].dos = module_cosmodistances_observerObject(sources[i].redshift, cosmology);
-			sources[i].dr = module_cosmodistances_lensSourceToObserverSource(lenses[0].z, sources[i].redshift, cosmology);
-		}
-		module_readParameters_debug_source(runmode.debug, sources, runmode.nsets);
 
+if (runmode.source == 1){
+	//Initialisation to default values.(Setting sources to z = 1.5 default value)
+	for(int i = 0; i < runmode.nsets; ++i){
+		sources[i].redshift = 1.5;
+	}
+	// This module function reads in the strong lensing sources
+	module_readParameters_readSources(&runmode, sources);
+	//Calculating cosmoratios
+	for(int i = 0; i < runmode.nsets; ++i){
+
+		sources[i].dls = module_cosmodistances_objectObject(lenses[0].z, sources[i].redshift, cosmology);
+		sources[i].dos = module_cosmodistances_observerObject(sources[i].redshift, cosmology);
+		sources[i].dr = module_cosmodistances_lensSourceToObserverSource(lenses[0].z, sources[i].redshift, cosmology);
 	}
+	module_readParameters_debug_source(runmode.debug, sources, runmode.nsets);
 
-	std::cout << "--------------------------" << std::endl << std::endl;
+}
+
+std::cout << "--------------------------" << std::endl << std::endl;
+
+double t_1(0),t_2(0),t_3(0),t_4(0);
 
-	double t_1(0),t_2(0),t_3(0),t_4(0);
+// Lenstool-CPU Grid-Gradient
+//===========================================================================================================
 
-	// Lenstool-CPU Grid-Gradient
-	//===========================================================================================================
+//Setting Test:
+double dx, dy;
 
-	//Setting Test:
-	double dx, dy;
+dx = (frame.xmax - frame.xmin)/(runmode.nbgridcells-1);
+dy = (frame.ymax - frame.ymin)/(runmode.nbgridcells-1);
 
-	dx = (frame.xmax - frame.xmin)/(runmode.nbgridcells-1);
-	dy = (frame.ymax - frame.ymin)/(runmode.nbgridcells-1);
+point test_point1_1,test_point2_2, test_result1_1,test_result2_2;
+double dlsds = images[0].dr;
 
-	point test_point1_1,test_point2_2, test_result1_1,test_result2_2;
-	double dlsds = images[0].dr;
+test_point1_1.x = frame.xmin;
+test_point1_1.y = frame.ymin;
+test_point2_2.x = frame.xmin+dx;
+test_point2_2.y = frame.ymin+dy;
 
-	test_point1_1.x = frame.xmin;
-	test_point1_1.y = frame.ymin;
-	test_point2_2.x = frame.xmin+dx;
-	test_point2_2.y = frame.ymin+dy;
+test_result1_1 = module_potentialDerivatives_totalGradient_SOA(&test_point1_1, &lenses_SOA, runmode.nhalos);
+test_result2_2 = module_potentialDerivatives_totalGradient_SOA(&test_point2_2, &lenses_SOA, runmode.nhalos);
 
-	test_result1_1 = module_potentialDerivatives_totalGradient_SOA(&test_point1_1, &lenses_SOA, runmode.nhalos);
-	test_result2_2 = module_potentialDerivatives_totalGradient_SOA(&test_point2_2, &lenses_SOA, runmode.nhalos);
+std::cout << " Initial Test " << std::endl;
+std::cout << " Test 1: " << std::endl;
+std::cout << " Point 1 : " << std::setprecision(5) << test_point1_1.x << " "<< test_point1_1.y <<  std::endl;
+std::cout << " Gradient  " << std::setprecision(5) << test_result1_1.x << " "<< test_result1_1.y <<  std::endl;
+std::cout << " Test 2: " << std::endl;
+std::cout << " Point 2 : " << std::setprecision(5) << test_point2_2.x << " "<< test_point2_2.y <<  std::endl;
+std::cout << " Gradient  " << std::setprecision(5) << test_result2_2.x << " "<< test_result2_2.y <<  std::endl;
 
-	std::cout << " Initial Test " << std::endl;
-	std::cout << " Test 1: " << std::endl;
-	std::cout << " Point 1 : " << std::setprecision(5) << test_point1_1.x << " "<< test_point1_1.y <<  std::endl;
-	std::cout << " Gradient  " << std::setprecision(5) << test_result1_1.x << " "<< test_result1_1.y <<  std::endl;
-	std::cout << " Test 2: " << std::endl;
-	std::cout << " Point 2 : " << std::setprecision(5) << test_point2_2.x << " "<< test_point2_2.y <<  std::endl;
-	std::cout << " Gradient  " << std::setprecision(5) << test_result2_2.x << " "<< test_result2_2.y <<  std::endl;
 
+double *grid_gradient_x, *grid_gradient_y;
 
-	double *grid_gradient_x, *grid_gradient_y;
+grid_gradient_x = (double *)malloc((int) (runmode.nbgridcells) * (runmode.nbgridcells) * sizeof(double));
+grid_gradient_y = (double *)malloc((int) (runmode.nbgridcells) * (runmode.nbgridcells) * sizeof(double));
+int grid_dim = runmode.nbgridcells;
 
-	grid_gradient_x = (double *)malloc((int) (runmode.nbgridcells) * (runmode.nbgridcells) * sizeof(double));
-	grid_gradient_y = (double *)malloc((int) (runmode.nbgridcells) * (runmode.nbgridcells) * sizeof(double));
-	int grid_dim = runmode.nbgridcells;
 
+#if 1
+t_2 = -myseconds();
+//Packaging the image to sourceplane conversion
+gradient_grid_CPU(grid_gradient_x,grid_gradient_y,&frame,&lenses_SOA,runmode.nhalos,grid_dim);
+t_2 += myseconds();
+
+std::cout << " gradient_grid_CPU Brute Force Benchmark " << std::endl;
+std::cout << " Test 1: " << std::endl;
+std::cout << " Point 1 : " << std::setprecision(5) << test_point1_1.x << " "<< test_point1_1.y <<  std::endl;
+std::cout << " Gradient  " << std::setprecision(5) << test_result1_1.x << " "<< test_result1_1.y <<  std::endl;
+std::cout << " Test 2: " << std::endl;
+std::cout << " Point 2 : " << std::setprecision(5) << test_point2_2.x << " "<< test_point2_2.y <<  std::endl;
+std::cout << " Gradient  " << std::setprecision(5) << test_result2_2.x << " "<< test_result2_2.y <<  std::endl;
+std::cout << " Time  " << std::setprecision(15) << t_2 << std::endl;
+#endif
 
 #if 1
-	t_2 = -myseconds();
-	//Packaging the image to sourceplane conversion
-	gradient_grid_CPU(grid_gradient_x,grid_gradient_y,&frame,&lenses_SOA,runmode.nhalos,grid_dim);
-	t_2 += myseconds();
-
-	std::cout << " gradient_grid_CPU Brute Force Benchmark " << std::endl;
-	std::cout << " Test 1: " << std::endl;
-	std::cout << " Point 1 : " << std::setprecision(5) << test_point1_1.x << " "<< test_point1_1.y <<  std::endl;
-	std::cout << " Gradient  " << std::setprecision(5) << test_result1_1.x << " "<< test_result1_1.y <<  std::endl;
-	std::cout << " Test 2: " << std::endl;
-	std::cout << " Point 2 : " << std::setprecision(5) << test_point2_2.x << " "<< test_point2_2.y <<  std::endl;
-	std::cout << " Gradient  " << std::setprecision(5) << test_result2_2.x << " "<< test_result2_2.y <<  std::endl;
-	std::cout << " Time  " << std::setprecision(15) << t_2 << std::endl;
+t_2 = -myseconds();
+gradient_grid_GPU_sorted(grid_gradient_x,grid_gradient_y,&frame,&lenses_SOA,runmode.nhalos,grid_dim);
+t_2 += myseconds();
+
+std::cout << " gradient_grid_GPU_sorted Brute Force Benchmark " << std::endl;
+std::cout << " Test 1: " << std::endl;
+std::cout << " Point 1 : " << std::setprecision(5) << test_point1_1.x << " "<< test_point1_1.y <<  std::endl;
+std::cout << " Gradient  " << std::setprecision(5) << test_result1_1.x << " "<< test_result1_1.y <<  std::endl;
+std::cout << " Test 2: " << std::endl;
+std::cout << " Point 2 : " << std::setprecision(5) << test_point2_2.x << " "<< test_point2_2.y <<  std::endl;
+std::cout << " Gradient  " << std::setprecision(5) << test_result2_2.x << " "<< test_result2_2.y <<  std::endl;
+std::cout << " Time  " << std::setprecision(15) << t_2 << std::endl;
 #endif
 
 
 
 }