diff --git a/Benchmarks/Scripts/GridGradientBenchmark.sh b/Benchmarks/Scripts/GridGradientBenchmark.sh
index 7f0c42b..4c9041b 100755
--- a/Benchmarks/Scripts/GridGradientBenchmark.sh
+++ b/Benchmarks/Scripts/GridGradientBenchmark.sh
@@ -1,30 +1,30 @@
 #!/bin/bash
 echo "Starting Mapping Test"
 
 #if recompiling lenstool is needed
 #cd ../Libs/lenstool-6.8.1/
 #make
 #cd ../../lenstool-hpc
 
 cd ../../src/
 make clean
 make 
 
 cd ../Benchmarks/GridGradientBenchmark
-make -f Makefile.GPU clean
-make -f Makefile.GPU
-rm -r tmp
+make -f Makefile.intel clean
+make -f Makefile.intel
+//rm -r tmp
 
 #./Bayesmap_GPU m1931.par T
 #./GridGradient_GPU ../ConfigFiles/TestResultParameter.par T
 ./GridGradient_GPU ../ConfigFiles/90Pot81.par T
 #./ChiBenchmark_GPU ../ConfigFiles/MarkusBenchmark.par T
 #./ChiBenchmark_GPU ../ConfigFiles/MarkusBenchmark1SIS.par T
 
 #cd ../Benchmarks/GradientBenchmark
 #make -f Makefile clean
 #make -f MakefileDouble
 #./GradientBenchmark
 
 echo "Finish Double test"
 
diff --git a/src/gradient.cpp b/src/gradient.cpp
index f2cd63f..6a6a3e3 100644
--- a/src/gradient.cpp
+++ b/src/gradient.cpp
@@ -1,964 +1,964 @@
 #include <iostream>
 #include <iomanip>
 #include <string.h>
 //#include <cuda_runtime.h>
 #include <math.h>
 #include <sys/time.h>
 #include <fstream>
 #include <immintrin.h>
 #include <map>
 /*
 #ifdef __AVX__
 #include "simd_math_avx.h"
 #endif
 #ifdef __AVX512F__
 #include "simd_math_avx512f.h"
 #endif
 */
 #include "structure_hpc.hpp"
 #include "gradient.hpp"
 #include "utils.hpp"
 //#include "iacaMarks.h"
 //
 //
 //
 /**@brief Return the gradient of the projected lens potential for one clump
  * !!! You have to multiply by dlsds to obtain the true gradient
  * for the expressions, see the papers : JP Kneib & P Natarajan, Cluster Lenses, The Astronomy and Astrophysics Review (2011) for 1 and 2
  * and JP Kneib PhD (1993) for 3
  * 
  * @param pImage 	point where the result is computed in the lens plane
  * @param lens		mass distribution
  */
 
 
 //
 /// Useful functions
 //
 complex
 piemd_1derivatives_ci05(type_t x, type_t y, type_t eps, type_t rc)
 {
         type_t  sqe, cx1, cxro, cyro, rem2;
         complex zci, znum, zden, zis, zres;
         type_t norm;
         //
         //std::cout << "piemd_lderivatives" << std::endl;
         sqe  = sqrt(eps);
         cx1  = ((type_t) 1. - eps) / ((type_t) 1. + eps);
         cxro = ((type_t) 1. + eps) * ((type_t) 1. + eps);
         cyro = ((type_t) 1. - eps) * ((type_t) 1. - eps);
         //
         rem2 = x * x / cxro + y * y / cyro;
         /*zci=cpx(0.,-0.5*(1.-eps*eps)/sqe);
           znum=cpx(cx1*x,(2.*sqe*sqrt(rc*rc+rem2)-y/cx1));
           zden=cpx(x,(2.*rc*sqe-y));
           zis=pcpx(zci,lncpx(dcpx(znum,zden)));
           zres=pcpxflt(zis,b0);*/
 
         // --> optimized code
         zci.re  = (type_t)  0.;
         zci.im  = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe;
         znum.re = cx1 * x;
         znum.im = (type_t) 2.*sqe*sqrt(rc*rc + rem2) - y/cx1;
         zden.re = x;
         zden.im = (type_t) 2.*rc * sqe - y;
         norm = zden.re * zden.re + zden.im * zden.im;     // zis = znum/zden
         zis.re = (znum.re * zden.re + znum.im * zden.im) / norm;
         zis.im = (znum.im * zden.re - znum.re * zden.im) / norm;
         norm = zis.re;
         zis.re = log(sqrt(norm * norm + zis.im * zis.im));  // ln(zis) = ln(|zis|)+i.Arg(zis)
         zis.im = atan2(zis.im, norm);
         //  norm = zis.re;
         zres.re = zci.re * zis.re - zci.im * zis.im;   // Re( zci*ln(zis) )
         zres.im = zci.im * zis.re + zis.im * zci.re;   // Im( zci*ln(zis) )
         //
         //zres.re = zis.re*b0;
         //zres.im = zis.im*b0;
         //
         return(zres);
 }
 //
 //// changes the coordinates of point P into a new basis (rotation of angle theta)
 ////      y'      y    x'
 ////       *      |   /
 ////         *    |  /  theta
 ////           *  | /
 ////             *|--------->x
 /*
 inline
 struct point rotateCoordinateSystem(struct point P, double theta)
 {
 	struct  point   Q;
 
 	Q.x = P.x*cos(theta) + P.y*sin(theta);
 	Q.y = P.y*cos(theta) - P.x*sin(theta);
 
 	return(Q);
 }
 */
 //
 //
 struct point
 grad_halo(const struct point *pImage, const struct Potential *lens)
 {
 	struct point true_coord, true_coord_rot, result;
 	type_t X, Y, R, angular_deviation, u;
 	complex zis;
 	//
 	result.x = result.y = (type_t) 0.;
 	//
 	/*positionning at the potential center*/
 	// Change the origin of the coordinate system to the center of the clump
 	true_coord.x   = pImage->x - lens->position.x;  
 	true_coord.y   = pImage->y - lens->position.y;
 	//printf("x, y = %f, %f\n", lens->position.x, lens->position.y);
 	//
 	true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle);
 	//
 	switch (lens->type)
 	{
 		case(5): /*Elliptical Isothermal Sphere*/
 			/*rotation of the coordiante axes to match the potential axes*/
 			//true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle);
 			//
 			R=sqrt(true_coord_rot.x*true_coord_rot.x*((type_t) 1. - lens->ellipticity_potential) + true_coord_rot.y*true_coord_rot.y*((type_t) 1. + lens->ellipticity_potential));	//ellippot = ellipmass/3
 			result.x = ((type_t) 1. - lens->ellipticity_potential)*lens->b0*true_coord_rot.x/(R);
 			result.y = ((type_t) 1. + lens->ellipticity_potential)*lens->b0*true_coord_rot.y/(R);
 			break;
 		case(8): /* PIEMD */
 			/*rotation of the coordiante axes to match the potential axes*/
 			/*Doing something....*/
 			complex zis = piemd_1derivatives_ci05(true_coord_rot.x, true_coord_rot.y, lens->ellipticity_potential, lens->rcore);
 			//
 			result.x = lens->b0*zis.re;
 			result.y = lens->b0*zis.im;
 			break;
 		case(81): //PIEMD Kassiola & Kovner,1993 I0.5c-I0.5cut
 			type_t t05;
 			if ( lens->ellipticity_potential > (type_t) 2E-4 )
 			{
 				//printf("1 ");
 				t05 = lens->b0*lens->rcut/(lens->rcut - lens->rcore);
 				complex zis     = piemd_1derivatives_ci05(true_coord_rot.x, true_coord_rot.y, lens->ellipticity_potential, lens->rcore);
 				complex zis_cut = piemd_1derivatives_ci05(true_coord_rot.x, true_coord_rot.y, lens->ellipticity_potential, lens->rcut);
 				result.x = t05 * (zis.re - zis_cut.re);
 				result.y = t05 * (zis.im - zis_cut.im);
 				//printf("	g   = %f %f\n", result.x, result.y);
 			}
 			else if (( u = true_coord_rot.x*true_coord_rot.x + true_coord_rot.y*true_coord_rot.y) > (type_t) 0. )
 			{
 				//printf("2 ");
 				// Circular dPIE Elliasdottir 2007 Eq A23 slighly modified for t05
 				X = lens->rcore;
 				Y = lens->rcut;
 				t05  = sqrt(u + X * X) - X - sqrt(u + Y * Y) + Y;  // Faster and equiv to Elliasdottir (see Golse PhD)
 				t05 *= lens->b0 * Y / (Y - X) / u; // 1/u because t05/sqrt(u) and normalised Q/sqrt(u)
 				result.x = t05*true_coord_rot.x;
 				result.y = t05*true_coord_rot.y;
 			}
 			else
 			{
 				//printf("3 ");
 				result.x = (type_t) 0.;
 				result.y = (type_t) 0.;
 			}
 			break;
 
 
 		default:
 			std::cout << "ERROR: Grad 1 profil type of clump "<< lens->name << " unknown : "<< lens->type << std::endl;
 			break;
 	};
 	result = rotateCoordinateSystem(result, -lens->ellipticity_angle); 
 	//printf("	rot grad = %.15f %.15f\n", result.x, result.y); 
 	return result;
 }
 //
 //
 //
 struct point module_potentialDerivatives_totalGradient(const int nhalos, const struct point* pImage, const struct Potential* lens)
 {
 	struct point grad, clumpgrad;
 	//
 	grad.x = (type_t) 0.;
 	grad.y = (type_t) 0.;
 	//
 	for(int i = 0; i < nhalos; i++)
 	{
 		clumpgrad = grad_halo(pImage, &lens[i]);  //compute gradient for each clump separately
 		//std::cout << clumpgrad.x << " " << clumpgrad.y << std::endl;
 		//nan check
 		//if(clumpgrad.x == clumpgrad.x or clumpgrad.y == clumpgrad.y)
 		{ 
 			// add the gradients
 			grad.x += clumpgrad.x;
 			grad.y += clumpgrad.y;
 		}  
 	}
 	//	
 	return(grad);
 }
 //
 // SOA versions, vectorizable
 //
 struct point module_potentialDerivatives_totalGradient_5_SOA(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos)
 {
         asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA begins");
         //printf("# module_potentialDerivatives_totalGradient_SIS_SOA begins\n");
 	//
 	struct point grad, result;
         grad.x = (type_t) 0.;
         grad.y = (type_t) 0.;
 	for(int i = shalos; i < shalos + nhalos; i++)
 	{
 		//
 		struct point true_coord, true_coord_rotation;
 		//
 		true_coord.x = pImage->x - lens->position_x[i];
 		true_coord.y = pImage->y - lens->position_y[i];
 		//
 		true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]);
 		type_t ell_pot = lens->ellipticity_potential[i];
                 //
 		type_t R = sqrt(true_coord_rotation.x*true_coord_rotation.x*((type_t) 1. - ell_pot) + true_coord_rotation.y*true_coord_rotation.y*((type_t) 1. + ell_pot));
 		//
 		result.x = ((type_t) 1. - ell_pot)*lens->b0[i]*true_coord_rotation.x/R;
 		result.y = ((type_t) 1. + ell_pot)*lens->b0[i]*true_coord_rotation.y/R;
 		//
 		result = rotateCoordinateSystem(result, -lens->ellipticity_angle[i]);
 		//
 		grad.x += result.x;
 		grad.y += result.y;
 		
 	}
 	return grad;
 }
 //
 //
 //
 struct point module_potentialDerivatives_totalGradient_5_SOA_v2(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos)
 {
         asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins");
         //printf("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins\n");
         //
         struct point grad, result;
         grad.x = (type_t) 0.;
         grad.y = (type_t) 0.;
         for(int i = shalos; i < shalos + nhalos; i++)
         {
 			struct point true_coord, true_coord_rotation;
 			//
 			true_coord.x = pImage->x - lens->position_x[i];
 			true_coord.y = pImage->y - lens->position_y[i];
 			//
 			//true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]);
 			type_t b0 = lens->b0[i];
 			//
 			type_t cose = lens->anglecos[i];
 			type_t sine = lens->anglesin[i];
 			//
 			type_t x = true_coord.x*cose + true_coord.y*sine;
 			type_t y = true_coord.y*cose - true_coord.x*sine;
 			//
 			type_t ell_pot = lens->ellipticity_potential[i];
 			//
 			type_t val = x*x*((type_t) 1. - ell_pot) + y*y*((type_t) 1. + ell_pot);
 			type_t R   = 1./sqrtf(val);
 			R = R*(1.5 - 0.5*val*R*R); 
 			result.x = ((type_t) 1. - ell_pot)*b0*x*R;
 			result.y = ((type_t) 1. + ell_pot)*b0*y*R;
 			//
 			grad.x += result.x*cose - result.y*sine;
 			grad.y += result.y*cose + result.x*sine;
 
 			//grad.x = x/R;
 			//grad.y = y/R;
         }
         return grad;
 }
 
 struct point module_potentialDerivatives_totalGradient_5_SOA_print(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos, int index)
 {
         asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins");
         //printf("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins\n");
         //
         struct point grad, result;
     	std::ofstream myfile;
         grad.x = (type_t) 0.;
         grad.y = (type_t) 0.;
 
 #ifdef _double
         std::string name = "Double_";
 #else
         std::string name = "Float_";
 #endif
 
         for(int i = shalos; i < shalos + nhalos; i++)
         {
 		//
 			struct point true_coord, true_coord_rotation;
 			//
 			true_coord.x = pImage->x - lens->position_x[i];
 			true_coord.y = pImage->y - lens->position_y[i];
 			////
 			/*
         	myfile.open (name + "pImage->x_1.txt", std::ios_base::app);
         	myfile << index << " " << pImage->x << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
-        	//*
+        	//
         	myfile.open (name + "pImage->y_1.txt", std::ios_base::app);
         	myfile << index << " " << pImage->y << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
         	//
-			/*
+
         	myfile.open (name + "true_coord.x_2.txt", std::ios_base::app);
         	myfile << index << " " << true_coord.x << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
         	//
         	myfile.open (name + "true_coord.y_2.txt", std::ios_base::app);
         	myfile << index << " " << true_coord.y << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
 			*///
 			type_t cose = lens->anglecos[i];
 			type_t sine = lens->anglesin[i];
 			////
 
         	myfile.open (name + "lens->anglecos[i]_3.txt", std::ios_base::app);
         	myfile << index << " " << lens->anglecos[i] << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
         	//
         	myfile.open (name + "lens->anglesin[i]_3.txt", std::ios_base::app);
         	myfile << index << " " << lens->anglesin[i] << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
 			////
 			type_t x = true_coord.x*cose + true_coord.y*sine;
 			type_t y = true_coord.y*cose - true_coord.x*sine;
 			////
 			/*
         	myfile.open (name + "x_4.txt", std::ios_base::app);
         	myfile << index << " " << x << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
-        	/*
+        	/
         	myfile.open (name + "y_4.txt", std::ios_base::app);
         	myfile << index << " " << y << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
         	*////
 			type_t ell_pot = lens->ellipticity_potential[i];
 			//
 			type_t R = sqrt(x*x*(1 - ell_pot) + y*y*(1 + ell_pot));
 			//
 			result.x = (1 - ell_pot)*lens->b0[i]*x/R;
 			result.y = (1 + ell_pot)*lens->b0[i]*y/R;
 			////
 			/*
         	myfile.open (name + "ell_pot_5.txt", std::ios_base::app);
         	myfile << index << " " << ell_pot << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
         	//
         	myfile.open (name + "R_5.txt", std::ios_base::app);
         	myfile << index << " " << R << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
         	myfile.open (name + "x_6.txt", std::ios_base::app);
         	myfile << index << " " << x << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
-        	/*
+
         	myfile.open (name + "y_6.txt", std::ios_base::app);
         	myfile << index << " " << y << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
         	*///
 			grad.x += result.x*cose - result.y*sine;
 			grad.y += result.y*cose + result.x*sine;
 			////
 			/*
         	myfile.open (name + "grad.x_7.txt", std::ios_base::app);
         	myfile << index << " " << grad.x << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
-        	/*
+
         	myfile.open (name + "grad.y_7.txt", std::ios_base::app);
         	myfile << index << " " << grad.y << std::setprecision(7)  << " " << std::endl;
         	myfile.close();
 			*////
         }
         return grad;
 }
 //
 //
 //
 struct point module_potentialDerivatives_totalGradient_5_SOA_v2_novec(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos)
 {
         asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA begins");
         //
         struct point grad, result;
         grad.x = (type_t) 0.;
         grad.y = (type_t) 0.;
-#pragma novec
+#pragma novector
 	for(int i = shalos; i < shalos + nhalos; i++)
         {
                 //
                 struct point true_coord, true_coord_rotation;
                 //
                 true_coord.x = pImage->x - lens->position_x[i];
                 true_coord.y = pImage->y - lens->position_y[i];
                 //
                 //true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]);
                 type_t cose = lens->anglecos[i];
                 type_t sine = lens->anglesin[i];
                 //
                 type_t x = true_coord.x*cose + true_coord.y*sine;
                 type_t y = true_coord.y*cose - true_coord.x*sine;
                 //:
                 type_t R = sqrt(x*x*((type_t) 1. - lens->ellipticity[i]/(type_t) 3.) + y*y*((type_t) 1. + lens->ellipticity[i]/(type_t) 3.));
 
                 result.x = ((type_t) 1. - lens->ellipticity[i]/(type_t) 3.)*lens->b0[i]*x/R;
                 result.y = ((type_t) 1. + lens->ellipticity[i]/(type_t) 3.)*lens->b0[i]*y/R;
                 //
                 grad.x += result.x*cose - result.y*sine;
                 grad.y += result.y*cose + result.x*sine;
         }
         return grad;
 }
 //
 //
 //
 struct point module_potentialDerivatives_totalGradient_8_SOA(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos)
 {
 	asm volatile("# module_potentialDerivatives_totalGradient_SOA begins");
 	// 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
 	// 
 	struct point grad, clumpgrad;
 	grad.x = (type_t) 0.;
 	grad.y = (type_t) 0.;
 	//
 	for(int i = shalos; i < shalos + nhalos; i++)
 	{
 		//IACA_START;
 		//
 		struct point true_coord, true_coord_rot; //, result;
 		//type_t       R, angular_deviation;
 		complex      zis;
 		//
 		//result.x = result.y = 0.;
 		//
 		//@@printf("image_x = %f image_y = %f\n",  pImage->x, pImage->y);	
 		true_coord.x = pImage->x - lens->position_x[i];
 		true_coord.y = pImage->y - lens->position_y[i];
 		//printf("x = %f y = %f\n",  true_coord.x, true_coord.y);	
 		/*positionning at the potential center*/
 		// Change the origin of the coordinate system to the center of the clump
 		true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]);
 		//
 		type_t x   = true_coord_rot.x;
 		type_t y   = true_coord_rot.y;
 		//@@printf("x = %f y = %f\n",  x, y);	
 		type_t eps = lens->ellipticity_potential[i];
 		type_t rc  = lens->rcore[i];
 		//
 		//std::cout << "piemd_lderivatives" << std::endl;
 		//
 		type_t sqe  = sqrt(eps);
 		//
 		type_t cx1  = ((type_t) 1. - eps)/((type_t) 1. + eps);
 		type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps);
 		type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps);
 		//
 		type_t rem2 = x*x/cxro + y*y/cyro;
 		//
 		complex zci, znum, zden, zres;
 		type_t norm;
 		//	
 		zci.re  = (type_t) 0.;
 		zci.im  = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe;
 		//@@printf("zci = %f %f\n", zci.re, zci.im);	
 		//
 		znum.re = cx1*x;
 		znum.im = (type_t) 2.*sqe*sqrt(rc*rc + rem2) - y/cx1;
 		//
 		zden.re = x;
 		zden.im = (type_t) 2.*rc*sqe - y;
 		norm    = (zden.re*zden.re + zden.im*zden.im);     // zis = znum/zden
 		//@@printf("norm = %f\n", norm);
 		//
 		zis.re  = (znum.re*zden.re + znum.im*zden.im)/norm;
 		zis.im  = (znum.im*zden.re - znum.re*zden.im)/norm;
 		//@@printf("zis = %f %f\n", zis.re, zis.im);	
 		norm    = zis.re;
 		zis.re  = log(sqrt(norm*norm + zis.im*zis.im));  // ln(zis) = ln(|zis|)+i.Arg(zis)
 		zis.im  = atan2(zis.im, norm);
 		//@@printf("y,x = %f %f\n", zis.im, norm);	
 		//  norm = zis.re;
 		zres.re = zci.re*zis.re - zci.im*zis.im;   // Re( zci*ln(zis) )
 		zres.im = zci.im*zis.re + zis.im*zci.re;   // Im( zci*ln(zis) )
 		//
 		//@@printf("zres: %f %f\n", zres.re, zres.im); 
 		//
 		zis.re  = zres.re;
 		zis.im  = zres.im;
 		//
 		//zres.re = zis.re*b0;
 		//zres.im = zis.im*b0;
 		// rotation
 		clumpgrad.x = zis.re;
 		clumpgrad.y = zis.im;
 		clumpgrad = rotateCoordinateSystem(clumpgrad, -lens->ellipticity_angle[i]);
 		//
 		clumpgrad.x = lens->b0[i]*clumpgrad.x;
 		clumpgrad.y = lens->b0[i]*clumpgrad.y;
 		//
 		//clumpgrad.x = lens->b0[i]*zis.re;
 		//clumpgrad.y = lens->b0[i]*zis.im;
 		//nan check
 		//if(clumpgrad.x == clumpgrad.x or clumpgrad.y == clumpgrad.y)
 		//{
 			// add the gradients
 		grad.x += clumpgrad.x;
 		grad.y += clumpgrad.y;
 		//@@printf("grad: %f %f\n", grad.x, grad.y); 
 		//@@std::cout << "grad.x = " << grad.x << " grad.y = " << grad.y << std::endl;
 		//}
 	}
 	//IACA_END;
 	//
 	return(grad);
 }
 //
 //
 //
 struct point module_potentialDerivatives_totalGradient_8_SOA_v2(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos)
 {
         asm volatile("# module_potentialDerivatives_totalGradient_8_SOA_v2 begins");
         //std::cout << "# module_potentialDerivatives_totalGradient_8_SOA_v2 begins" << std::endl;
 	//
         // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
         //
         struct point grad, clumpgrad;
         grad.x = 0;
         grad.y = 0;
 	//printf("%d %d\n", shalos, nhalos);
         //
         for(int i = shalos; i < shalos + nhalos; i++)
         {
                 //IACA_START;
                 //
                 struct point true_coord, true_coord_rot; //, result;
                 complex      zis;
                 type_t b0   = lens->b0[i];
                 //
                 true_coord.x = pImage->x - lens->position_x[i];
                 true_coord.y = pImage->y - lens->position_y[i];
                 //
                 //true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]);
                 //
                 type_t cose = lens->anglecos[i];
                 type_t sine = lens->anglesin[i];
 		//
                 type_t x = true_coord.x*cose + true_coord.y*sine;
                 type_t y = true_coord.y*cose - true_coord.x*sine;
                 //
                 type_t eps = lens->ellipticity_potential[i];
                 type_t rc  = lens->rcore[i];
                 //
                 type_t sqe  = sqrt(eps);
                 //
                 type_t cx1  = ((type_t) 1. - eps)/((type_t) 1. + eps);
                 type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps);
                 type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps);
                 //
                 type_t rem2 = x*x/cxro + y*y/cyro;
                 //
                 complex zci, znum, zden, zres;
                 type_t norm;
                 //
                 zci.re  = (type_t) 0.;
                 zci.im  = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe;
                 //
 		KERNEL(rc, zres)
 		//
                 grad.x += b0*(zres.re*cose - zres.im*sine);
                 grad.y += b0*(zres.im*cose + zres.re*sine);
                 //
         }
         //IACA_END;
         //
         return(grad);
 }
 //
 //
 //
 struct point module_potentialDerivatives_totalGradient_8_SOA_v2_novec(const struct point *pImage, const struct Potential_SOA *lens
 , int shalos, int nhalos)
 {
         asm volatile("# module_potentialDerivatives_totalGradient_8_SOA_v2 begins");
         //std::cout << "# module_potentialDerivatives_totalGradient_8_SOA_v2 begins" << std::endl;
         //
         // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
         //
         struct point grad, clumpgrad;
         grad.x = (type_t) 0.;
         grad.y = (type_t) 0.;
         //printf("%d %d\n", shalos, nhalos);
         //
 #pragma novector
         for(int i = shalos; i < shalos + nhalos; i++)
         {
                 //IACA_START;
                 //
                 struct point true_coord, true_coord_rot; //, result;
                 complex      zis;
                 type_t b0   = lens->b0[i];
                 //
                 true_coord.x = pImage->x - lens->position_x[i];
                 true_coord.y = pImage->y - lens->position_y[i];
                 //
                 //true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]);
                 //
                 type_t cose = lens->anglecos[i];
                 type_t sine = lens->anglesin[i];
                 //
                 type_t x = true_coord.x*cose + true_coord.y*sine;
                 type_t y = true_coord.y*cose - true_coord.x*sine;
                 //
                 type_t eps = lens->ellipticity_potential[i];
                 type_t rc  = lens->rcore[i];
                 //
                 type_t sqe  = sqrt(eps);
                 //
                 type_t cx1  = ((type_t) 1. - eps)/((type_t) 1. + eps);
                 type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps);
                 type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps);
                 //
                 type_t rem2 = x*x/cxro + y*y/cyro;
                 //
                 complex zci, znum, zden, zres;
                 type_t norm;
                 //
                 zci.re  = (type_t) 0.;
                 zci.im  = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe;
                 //
                 KERNEL(rc, zres)
                 //
                 grad.x += b0*(zres.re*cose - zres.im*sine);
                 grad.y += b0*(zres.im*cose + zres.re*sine);
                 //
         }
         //IACA_END;
         //
         return(grad);
 }
 //
 //
 //
 struct point module_potentialDerivatives_totalGradient_81_SOA(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos)
 {
         asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins");
 	//std::cout << "# module_potentialDerivatives_totalGradient_SOA begins" << std::endl;
         // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
         // 
         struct point grad, clumpgrad;
         grad.x = (type_t) 0.;
         grad.y = (type_t) 0.;
         for(int i = shalos; i < shalos + nhalos; i++)
         {
                 //IACA_START;
                 //
                 struct point true_coord, true_coord_rot; //, result;
                 //type_t       R, angular_deviation;
                 complex      zis;
                 //
                 //result.x = result.y = 0.;
                 //
                 true_coord.x = pImage->x - lens->position_x[i];
                 true_coord.y = pImage->y - lens->position_y[i];
                 /*positionning at the potential center*/
                 // Change the origin of the coordinate system to the center of the clump
                 true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]);
                 //
                 type_t x    = true_coord_rot.x;
                 type_t y    = true_coord_rot.y;
                 type_t eps  = lens->ellipticity_potential[i];
                 type_t rc   = lens->rcore[i];
                 type_t rcut = lens->rcut[i];
 		type_t b0   = lens->b0[i];
 		type_t t05  = b0*rcut/(rcut - rc);
                 //
                 type_t sqe  = sqrt(eps);
                 //
                 type_t cx1  = ((type_t) 1. - eps)/((type_t) 1. + eps);
                 type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps);
                 type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps);
                 //
                 type_t rem2 = x*x/cxro + y*y/cyro;
                 //
                 complex zci, znum, zden, zres_rc, zres_rcut;
                 type_t norm;
                 //      
                 zci.re  = (type_t) 0.;
                 zci.im  = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe;
 		// step 1
 		{
 			KERNEL(rc, zres_rc)
 		}
 		// step 2
 		{
 			KERNEL(rcut, zres_rcut)
                 }
 		zis.re  = t05*(zres_rc.re - zres_rcut.re);
 		zis.im  = t05*(zres_rc.im - zres_rcut.im); 
                 // rotation
                 clumpgrad.x = zis.re;
                 clumpgrad.y = zis.im;
                 clumpgrad = rotateCoordinateSystem(clumpgrad, -lens->ellipticity_angle[i]);
                 //
                 grad.x += clumpgrad.x;
                 grad.y += clumpgrad.y;
                 //}
         }
         //IACA_END;
         //
         return(grad);
 }
 //
 //
 //
 struct point module_potentialDerivatives_totalGradient_81_SOA_v2(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos)
 {
         asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins");
         //std::cout << "# module_potentialDerivatives_totalGradient_81_SOA begins" << std::endl;
         // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
         //
         struct point grad, clumpgrad;
         grad.x = (type_t) 0.;
         grad.y = (type_t) 0.;
         for(int i = shalos; i < shalos + nhalos; i++)
         {
                 //IACA_START;
                 //
                 struct point true_coord, true_coord_rot; //, result;
                 //type_t       R, angular_deviation;
                 complex      zis;
                 //
                 //result.x = result.y = 0.;
                 //
                 true_coord.x = pImage->x - lens->position_x[i];
                 true_coord.y = pImage->y - lens->position_y[i];
                 /*positionning at the potential center*/
                 // Change the origin of the coordinate system to the center of the clump
                 type_t cose = lens->anglecos[i];
                 type_t sine = lens->anglesin[i];
                 type_t x = true_coord.x*cose + true_coord.y*sine;
                 type_t y = true_coord.y*cose - true_coord.x*sine;
 		//
                 type_t eps  = lens->ellipticity_potential[i];
                 type_t rc   = lens->rcore[i];
                 type_t rcut = lens->rcut[i];
                 type_t b0   = lens->b0[i];
                 type_t t05  = b0*rcut/(rcut - rc);
                 //
                 type_t sqe  = sqrt(eps);
                 //
                 type_t cx1  = ((type_t) 1. - eps)/((type_t) 1. + eps);
                 type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps);
                 type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps);
                 //
                 type_t rem2 = x*x/cxro + y*y/cyro;
                 //
                 complex zci, znum, zden, zres_rc, zres_rcut;
                 type_t norm;
                 //
                 zci.re  = (type_t) 0.;
                 zci.im  = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe;
 		//
                 // step 1
                 {
 			KERNEL(rc, zres_rc)
                 }
                 // step 2
                 {
 			KERNEL(rcut, zres_rcut)
                 }
                 zis.re  = t05*(zres_rc.re - zres_rcut.re);
                 zis.im  = t05*(zres_rc.im - zres_rcut.im);
                 // rotation
 		grad.x += (zis.re*cose - zis.im*sine);
                 grad.y += (zis.im*cose + zis.re*sine);
                 //
         }
         //
         return(grad);
 }
 //
 //
 //
 struct point module_potentialDerivatives_totalGradient_81_SOA_v2_novec(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos)
 {
         asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins");
         //std::cout << "# module_potentialDerivatives_totalGradient_81_SOA begins" << std::endl;
         // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
         struct point grad, clumpgrad;
         grad.x = 0.;
         grad.y = 0.;
 #pragma novector
         for(int i = shalos; i < shalos + nhalos; i++)
         {
                 //IACA_START;
                 //
                 struct point true_coord, true_coord_rot; //, result;
                 //type_t       R, angular_deviation;
                 complex      zis;
                 //
                 //result.x = result.y = 0.;
                 //
                 true_coord.x = pImage->x - lens->position_x[i];
                 true_coord.y = pImage->y - lens->position_y[i];
                 /*positionning at the potential center*/
                 // Change the origin of the coordinate system to the center of the clump
                 type_t cose = lens->anglecos[i];
                 type_t sine = lens->anglesin[i];
                 type_t x = true_coord.x*cose + true_coord.y*sine;
                 type_t y = true_coord.y*cose - true_coord.x*sine;
                 //
                 type_t eps  = lens->ellipticity_potential[i];
                 type_t rc   = lens->rcore[i];
                 type_t rcut = lens->rcut[i];
                 type_t b0   = lens->b0[i];
                 type_t t05  = b0*rcut/(rcut - rc);
                 //
                 type_t sqe  = sqrt(eps);
                 //
                 type_t cx1  = ((type_t) 1. - eps)/((type_t) 1. + eps);
                 type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps);
                 type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps);
                 //
                 type_t rem2 = x*x/cxro + y*y/cyro;
                 //
                 complex zci, znum, zden, zres_rc, zres_rcut;
                 type_t norm;
                 //
                 zci.re  = (type_t) 0.;
                 zci.im  = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe;
                 //
                 // step 1
                 {
                         KERNEL(rc, zres_rc)
                 }
                 // step 2
                 {
                         KERNEL(rcut, zres_rcut)
                 }
                 zis.re  = t05*(zres_rc.re - zres_rcut.re);
                 zis.im  = t05*(zres_rc.im - zres_rcut.im);
                 // rotation
                 grad.x += (zis.re*cose - zis.im*sine);
                 grad.y += (zis.im*cose + zis.re*sine);
                 //
         }
         //
         return(grad);
 }
 //
 //
 //
 typedef struct point (*halo_func_t) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos); 
 halo_func_t halo_func[100] = 
 { 
 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_5_SOA_v2, 0, 0, module_potentialDerivatives_totalGradient_8_SOA_v2,  0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0,  module_potentialDerivatives_totalGradient_81_SOA_v2, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 //
 //
 //
 struct point module_potentialDerivatives_totalGradient_SOA(const struct point *pImage, const struct Potential_SOA *lens, int nhalos)
 {
         struct point grad, clumpgrad;
         //
         grad.x = clumpgrad.x = (type_t) 0.;
         grad.y = clumpgrad.y = (type_t) 0.;
 	//
 	int shalos = 0;
 
 	while (shalos < nhalos)
 	{
 		int lens_type = lens->type[shalos];
 		int count     = 1;
 		while (lens->type[shalos + count] == lens_type and shalos + count < nhalos){
 			//int i = count;
 			//std::cerr << lens->position_x[i] << lens->position_y[i] << lens->anglecos[i]<< lens->anglesin[i]<< lens->ellipticity_potential[i] <<  lens->rcore[i] << lens->rcut[i] << lens->b0[i] << std::endl;
 			//std::cerr << "lens->type[shalos + count] = " << lens->type[shalos + count] << " " << lens_type << " " << lens_type << " " << " count = " << count << std::endl;
 			count++;
 		}
 
 		//	
 		clumpgrad = (*halo_func[lens_type])(pImage, lens, shalos, count);
 		//
 		//std::cerr << lens->position_x[i] << lens->position_y[i] << lens->anglecos[i]<< lens->anglesin[i]<< lens->ellipticity_potential[i] <<  lens->rcore[i] << lens->rcut[i] << lens->b0[i] << std::endl;
 		//std::cerr << "type = " << lens_type << " " << count << " " << nhalos << " " << " grad.x = " << clumpgrad.x << " grad.y = " << clumpgrad.y << std::endl;
 		grad.x += clumpgrad.x;
 		grad.y += clumpgrad.y;
 		shalos += count;
 	}	
 
         return(grad);
 }
 //
 typedef struct point (*halo_func_t_novec) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos);
 //
 halo_func_t_novec halo_func_novec[100] =
 {
 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_5_SOA_v2_novec, 0, 0, module_potentialDerivatives_totalGradient_8_SOA_v2_novec,  0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0,  module_potentialDerivatives_totalGradient_81_SOA_v2_novec, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 //
 //
 //
 struct point module_potentialDerivatives_totalGradient_SOA_novec(const struct point *pImage, const struct Potential_SOA *lens, int nhalos)
 {
         struct point grad, clumpgrad;
         //
         grad.x = clumpgrad.x = 0;
         grad.y = clumpgrad.y = 0;
         //
         int shalos = 0;
         //
         //module_potentialDerivatives_totalGradient_81_SOA(pImage, lens, 0, nhalos);
         //return;
         /*
         int* p_type = &(lens->type)[0];
         int* lens_type = (int*) malloc(nhalos*sizeof(int));
         memcpy(lens_type, &(lens->type)[0], nhalos*sizeof(int));
         */
         //quicksort(lens_type, nhalos);
         //
         while (shalos < nhalos)
         {
                 int lens_type = lens->type[shalos];
                 int count     = 1;
                 while (lens->type[shalos + count] == lens_type) count++;
                 //std::cerr << "type = " << lens_type << " " << count << " " << shalos << std::endl;
                 //
                 clumpgrad = (*halo_func_novec[lens_type])(pImage, lens, shalos, count);
                 //
                 grad.x += clumpgrad.x;
                 grad.y += clumpgrad.y;
                 shalos += count;
         }
 
         return(grad);
 }
 
 
 
diff --git a/src/gradient2_GPU.cu b/src/gradient2_GPU.cu
index 38a3147..2e5a8e0 100644
--- a/src/gradient2_GPU.cu
+++ b/src/gradient2_GPU.cu
@@ -1,340 +1,340 @@
 #include <fstream>
 #include "grid_gradient2_GPU.cuh"
 #include "gradient2_GPU.cuh"
 #include "structure_hpc.hpp"
 
 #define BLOCK_SIZE_X 8
 #define BLOCK_SIZE_Y 16
 
 //#define ROT
 
 #define _SHARED_MEM
 
 #ifdef _SHARED_MEM
 #define SHARED __shared__
 #warning "shared memory"
 extern __shared__ type_t shared[];
 #else
 #define SHARED 
 #endif
 
 #define Nx 1
 #define Ny 0
 
 extern "C" {
 double myseconds();
 }
 
 __device__ matrix mdci05_gpu(type_t x, type_t y, type_t eps, type_t rc, type_t b0)
 {
 	matrix res;
     type_t   ci, sqe, cx1, cxro, cyro, wrem;
     type_t  didyre, didyim, didxre;// didxim;
     type_t  cx1inv, den1, num2, den2;
 
     sqe = sqrt(eps);
     cx1 = (1. - eps) / (1. + eps);
     cx1inv = 1. / cx1;
     cxro = (1. + eps) * (1. + eps);     /* rem^2=x^2/(1+e^2) + y^2/(1-e^2) Eq 2.3.6*/
     cyro = (1. - eps) * (1. - eps);
     ci = 0.5 * (1. - eps * eps) / sqe;
     wrem = sqrt(rc * rc + x * x / cxro + y * y / cyro); /*wrem^2=w^2+rem^2 with w core radius*/
     den1 = 2.*sqe * wrem - y * cx1inv;
     den1 = cx1 * cx1 * x * x + den1 * den1;
     num2 = 2.*rc * sqe - y;
     den2 = x * x + num2 * num2;
 
     didxre = ci * ( cx1 * (2.*sqe * x * x / cxro / wrem - 2.*sqe * wrem + y * cx1inv) / den1 + num2 / den2 );
     didyre = ci * ( (2 * sqe * x * y * cx1 / cyro / wrem - x) / den1 + x / den2 );
 
     didyim = ci * ( (2 * sqe * wrem * cx1inv - y * cx1inv * cx1inv - 4 * eps * y / cyro +
                      2 * sqe * y * y / cyro / wrem * cx1inv) / den1 - num2 / den2 );
     //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printf("didxre %f didyre %f didyim %f ",didxre,didyre,didyim);
     res.a = b0 * didxre;
     res.b = res.d = b0 * didyre; //(didyre+didxim)/2.;
     res.c = b0 * didyim;
 
     return(res);
 
 
 }
 
 
 __device__ void printmat_gpu(matrix A){
 	printf("A: %lf, B: %lf, C:%lf, D:%lf \n",A.a,A.b,A.c,A.d);
 }
 
 
 __device__ matrix module_potentialDerivatives_totalGradient2_81_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos){
     //asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins");
 
     //std::cout << "# module_potentialDerivatives_totalGradient_81_SOA begins" << std::endl;
     // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
     //
-    type_t t05, vala,valb,valc,vald;
+    type_t t05;
      struct matrix grad2, clump, clumpcore, clumpcut;
      grad2.a =  0;
      grad2.b =  0;
      grad2.c =  0;
      grad2.d =  0;
 #if 1
 
      for(int i = shalos; i < shalos + nhalos; i++)
      {
 
-		struct point true_coord, true_coord_rotation;
+		struct point true_coord;
 		//True coord
 		true_coord.x = pImage->x - lens->position_x[i];
 		true_coord.y = pImage->y - lens->position_y[i];
 		//
 
 		//std::cerr << "start" << std::endl;
 		//if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printf("start\n");
 		//Rotation
 		type_t cose = lens->anglecos[i];
 		type_t sine = lens->anglesin[i];
 		//
 		type_t x = true_coord.x*cose + true_coord.y*sine;
 		type_t y = true_coord.y*cose - true_coord.x*sine;
 		// 81 comput
 		t05 = lens->rcut[i] / (lens->rcut[i] - lens->rcore[i]);
 
 		//if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0)printf("t05 %f rcut: %f, rcore: %f, b0:%f \n",t05, lens->rcut[i],lens->rcore[i], lens->b0[i]);
 		clumpcore = mdci05_gpu(x, y, lens->ellipticity_potential[i], lens->rcore[i], lens->b0[i]);
 		////////////////////////////
 #if 0
 		//matrix res;
 		type_t rc = lens->rcore[i];
 		type_t eps =lens->ellipticity_potential[i];
 
 	    type_t   ci, sqe, cx1, cxro, cyro, wrem;
 	    type_t  didyre, didyim, didxre;// didxim;
 	    type_t  cx1inv, den1, num2, den2;
 
 	    sqe = sqrt( lens->ellipticity_potential[i]);
 	    cx1 = (1. - eps) / (1. + eps);
 	    cx1inv = 1. / cx1;
 	    cxro = (1. + eps) * (1. + eps);     /* rem^2=x^2/(1+e^2) + y^2/(1-e^2) Eq 2.3.6*/
 	    cyro = (1. - eps) * (1. - eps);
 	    ci = 0.5 * (1. - eps * eps) / sqe;
 
 	    wrem = sqrt(rc * rc + x * x / cxro + y * y / cyro); /*wrem^2=w^2+rem^2 with w core radius*/
 	    den1 = 2.*sqe * wrem - y * cx1inv;
 	    den1 = cx1 * cx1 * x * x + den1 * den1;
 	    num2 = 2.*rc * sqe - y;
 	    den2 = x * x + num2 * num2;
 
 	    didxre = ci * ( cx1 * (2.*sqe * x * x / cxro / wrem - 2.*sqe * wrem + y * cx1inv) / den1 + num2 / den2 );
 	    if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printf("cyro %f ",cyro);
 	    if(cyro == 0) printf("I %d fucked up!",threadIdx.x);
 	    //didyre = ci * ( (2 * sqe * x * y * cx1 / wrem - x) + x  );
 	    didyre = ci * ( (2 * sqe * x * y * cx1 / wrem - x) / den1 + x / den2 );
 	    didyre = didyre /cyro;
 	    //didyre = ci * ( (2 * sqe * x * y * cx1 / cyro / wrem - x) / den1 + x / den2 );
 
 	    didyim = ci * ( (2 * sqe * wrem * cx1inv - y * cx1inv * cx1inv - 4 * eps * y / cyro +
 	                     2 * sqe * y * y / cyro / wrem * cx1inv) / den1 - num2 / den2 );
 
 	    //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printf("didxre %f didyre %f didyim %f ",didxre,didyre,didyim);
 
 	    clumpcore.a = lens->b0[i] * didxre;
 	    clumpcore.b = clumpcore.d = lens->b0[i] * didyre; //(didyre+didxim)/2.;
 	    clumpcore.c = lens->b0[i] * didyim;
 #endif
 	    ////////////////////////////////////////////////
 		clumpcut = mdci05_gpu(x, y, lens->ellipticity_potential[i], lens->rcut[i], lens->b0[i]);
 		/////////////////////////////////////
 #if 0
 		rc = lens->rcut[i];
 	    sqe = sqrt( lens->ellipticity_potential[i]);
 	    cx1 = (1. - eps) / (1. + eps);
 	    cx1inv = 1. / cx1;
 	    cxro = (1. + eps) * (1. + eps);     /* rem^2=x^2/(1+e^2) + y^2/(1-e^2) Eq 2.3.6*/
 	    cyro = (1. - eps) * (1. - eps);
 	    ci = 0.5 * (1. - eps * eps) / sqe;
 	    wrem = sqrt(rc * rc + x * x / cxro + y * y / cyro); /*wrem^2=w^2+rem^2 with w core radius*/
 	    den1 = 2.*sqe * wrem - y * cx1inv;
 	    den1 = cx1 * cx1 * x * x + den1 * den1;
 	    num2 = 2.*rc * sqe - y;
 	    den2 = x * x + num2 * num2;
 
 	    didxre = ci * ( cx1 * (2.*sqe * x * x / cxro / wrem - 2.*sqe * wrem + y * cx1inv) / den1 + num2 / den2 );
 	    didyre = ci * ( (2 * sqe * x * y * cx1 / cyro / wrem - x) / den1 + x / den2 );
 
 	    didyim = ci * ( (2 * sqe * wrem * cx1inv - y * cx1inv * cx1inv - 4 * eps * y / cyro +
 	                     2 * sqe * y * y / cyro / wrem * cx1inv) / den1 - num2 / den2 );
 	    //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printf("didxre %f didyre %f didyim %f ",didxre,didyre,didyim);
 	    clumpcut.a = lens->b0[i] * didxre;
 	    clumpcut.b = clumpcut.d = lens->b0[i] * didyre; //(didyre+didxim)/2.;
 	    clumpcut.c = lens->b0[i] * didyim;
 #endif
 		///////////
 		//if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printmat_gpu(clumpcore);
 		//if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printmat_gpu(clumpcut);
 		//
 #if 1
 		clumpcore.a = t05 * (clumpcore.a - clumpcut.a);
 		clumpcore.b = t05 * (clumpcore.b - clumpcut.b);
 		clumpcore.c = t05 * (clumpcore.c - clumpcut.c);
 		clumpcore.d = t05 * (clumpcore.d - clumpcut.d);
 		//if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printmat_gpu(clumpcore);
 
 		//rotation matrix  1
 		clumpcut.a = clumpcore.a * cose + clumpcore.b * -sine;
 		clumpcut.b = clumpcore.a * sine + clumpcore.b * cose;
 		clumpcut.c = clumpcore.d * sine + clumpcore.c * cose;
 		clumpcut.d = clumpcore.d * cose + clumpcore.c * -sine;
 		//if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printmat_gpu(clumpcut);
 		//rotation matrix  2
 		clump.a = cose * clumpcut.a + -sine * clumpcut.d;
 		clump.b = cose * clumpcut.b + -sine * clumpcut.c;
 		clump.c = sine * clumpcut.b + cose * clumpcut.c;
 		clump.d = sine * clumpcut.a + cose * clumpcut.d;
 #endif
 		//if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printmat_gpu(clump);
 		//vala += clump.a;
 		//valb += clump.b;
 		//valc += clump.c;
 		//vald += clump.d;
 		grad2.a += clump.a;
 		grad2.b += clump.b;
 		grad2.c += clump.c;
 		grad2.d += clump.d;
 		//if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printmat_gpu(grad2);
 
      }
      //
      //grad2.a =  vala;
      //grad2.b =  valb;
      //grad2.c =  valc;
      //grad2.d =  vald;
 #endif
      return(grad2);
 
 }
 #if 1
 typedef struct matrix (*halo_func2_GPU_t) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos);
 
 __constant__ halo_func2_GPU_t halo_func2_GPU[100] =
 {
 	0, 0, 0, 0, 0, 0, 0, 0, 0,  0,
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	   0,  module_potentialDerivatives_totalGradient2_81_SOA_GPU, 0, 0, 0, 0, 0, 0, 0, 0,
 	   0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 	   };
 #endif
 __global__
 void
 module_potentialDerivatives_totalGradient2_SOA_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct Potential_SOA *lens, const struct grid_param *frame, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart)
 {
     struct point image_point;
 	struct matrix grad, clumpgrad;
         //
         int col = blockIdx.x*blockDim.x + threadIdx.x;
         int row = blockIdx.y*blockDim.y + threadIdx.y;
         //if(row == 0 && col == 0) printf("Start GPU \n");
 
         //
         if ((row + jstart < nbgridcells_y) && (col + istart < nbgridcells_x))
         {
                 int index = row*nbgridcells_x + col;
                 // Create temp grad variable to minimise writing to global memory grid_grad
         		grad.a = 0;
         		grad.b = 0;
         		grad.c = 0;
         		grad.d = 0;
                 //
                 image_point.x = frame->xmin + (col + istart)*dx;
                 image_point.y = frame->ymin + (row + jstart)*dy;
                 //
                 int shalos = 0;
                 //if(row == 0 && col == 0) printf("Start 2 GPU \n");
                 //if(row == 0 && col == 0) std::cout << std::endl;;
 
 
                 while (shalos < nhalos)
                 {
                         int lens_type = lens->type[shalos];
                         int count     = 1;
                         while (lens->type[shalos + count] == lens_type and shalos + count < nhalos) count++;
                         //
                         //if(row == 0 && col == 0) printf("point = %p \n", halo_func2_GPU[lens_type] );
                         //
                         //clumpgrad = (*halo_func2_GPU[lens_type])(&image_point, lens, shalos, count);
                         clumpgrad = module_potentialDerivatives_totalGradient2_81_SOA_GPU(&image_point, lens, shalos, count);
 
                         //
             			grad.a += clumpgrad.a;
             			grad.b += clumpgrad.b;
             			grad.c += clumpgrad.c;
             			grad.d += clumpgrad.d;
             			shalos += count;
             		}
             		// Write to global memory
             		grid_grad2_a[index] = grad.a;
             		grid_grad2_b[index] = grad.b;
             		grid_grad2_c[index] = grad.c;
             		grid_grad2_d[index] = grad.d;
 
         }
 }
 
 __global__
 void
 module_potentialDerivatives_totalGradient2_SOA_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int nhalos)
 {
     struct point image_point;
 	struct matrix grad, clumpgrad;
 	//
     int col = blockIdx.x*blockDim.x + threadIdx.x;
     int row = blockIdx.y*blockDim.y + threadIdx.y;
     //
     if ((row < nbgridcells) && (col < nbgridcells))
     {
         //
         type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
         type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
 		//
 		int index = row*nbgridcells + col;
 		// Create temp grad variable to minimise writing to global memory grid_grad
 		grad.a = 0;
 		grad.b = 0;
 		grad.c = 0;
 		grad.d = 0;
 		//
 		image_point.x = frame->xmin + col*dx;
 		image_point.y = frame->ymin + row*dy;
 		//
 		int shalos = 0;
 		while (shalos < nhalos)
 		{
 			int lens_type = lens->type[shalos];
 			int count     = 1;
 			while (lens->type[shalos + count] == lens_type and shalos + count < nhalos) count++;
 			//
 			clumpgrad = (*halo_func2_GPU[lens_type])(&image_point, lens, shalos, count);
 			//
 			//clumpgrad = module_potentialDerivatives_totalGradient2_81_SOA_GPU(&image_point, lens, shalos, count);
 			grad.a += clumpgrad.a;
 			grad.b += clumpgrad.b;
 			grad.c += clumpgrad.c;
 			grad.d += clumpgrad.d;
 			shalos += count;
 		}
 		// Write to global memory
 		grid_grad2_a[index] = grad.a;
 		grid_grad2_b[index] = grad.b;
 		grid_grad2_c[index] = grad.c;
 		grid_grad2_d[index] = grad.d;
 	//if ((row == 0) && (col == 9))
 	//printf("%f %f: %f %f\n",  image_point.x, image_point.y, grid_grad_x[index], grid_grad_y[index]);
     }
 
 }
 
diff --git a/src/gradient_GPU.cu b/src/gradient_GPU.cu
index 4f0395d..6b769b5 100644
--- a/src/gradient_GPU.cu
+++ b/src/gradient_GPU.cu
@@ -1,1000 +1,999 @@
 #include <fstream>
 #include "grid_gradient_GPU.cuh"
 #include "gradient.hpp"
 #include "gradient_GPU.cuh"
 #include "gradient.hpp"
 
 #define BLOCK_SIZE_X 8
 #define BLOCK_SIZE_Y 16
 
 //#define ROT
 
 #define _SHARED_MEM
 
 #ifdef _SHARED_MEM
 #define SHARED __shared__
 #warning "shared memory"
 extern __shared__ type_t shared[];
 #else
 #define SHARED 
 #endif
 
 #define Nx 1
 #define Ny 0
 
 extern "C" {
 double myseconds();
 }
 /*
 void
 module_potentialDerivatives_totalGradient_SOA_CPU_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_cpu, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos);
 
 void
 module_potentialDerivatives_totalGradient_SOA_CPU_GPU_v2(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_cpu, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos);
 
 void calculate_cossin_values(type_t *theta_cos, type_t *theta_sin, type_t *angles, int nhalos ){
 	for(int i = 0 ; i < nhalos; i++)
 	{
 		theta_cos[i] = cos(angles[i]);
 		theta_sin[i] = sin(angles[i]);
 	}
 }
 */
 
 
 
-
+#if 0
 
 __global__
 void
 module_potentialDerivatives_totalGradient_8_SOA_GPU_cur(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int shalos, int nhalos)
 {
         //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins");
         // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
         //
-        struct point grad, clumpgrad, image_point;
+        struct point grad, image_point;
 	//
         grad.x = 0;
         grad.y = 0;
         //
         int col = blockIdx.x*blockDim.x + threadIdx.x;
         int row = blockIdx.y*blockDim.y + threadIdx.y;
         //
         if ((row < nbgridcells) && (col < nbgridcells))
         {
                 //
                 int index = row*nbgridcells + col;
                 //
                 //grid_grad_x[index] = 0.;
                 //grid_grad_y[index] = 0.;
                 //
                 type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
                 type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
                 //
                 image_point.x = frame->xmin + col*dx;
                 image_point.y = frame->ymin + row*dy;
 		//
                 for(int i = shalos; i < shalos + nhalos; i++)
                 {
-                        struct point true_coord, true_coord_rot; //, result;
                         //type_t       R, angular_deviation;
                         complex      zis;
                         //
                         // positionning at the potential center
                         // Change the origin of the coordinate system to the center of the clump
                         //
 			//@@if ((row == Ny) && (col == Nx)) printf("image_x = %f, %f image_y = %f, %f\n",  image_point.x, frame->xmin, image_point.y,frame->ymin);
 			type_t true_coord_x = image_point.x - __ldg(&lens->position_x[i]);
                         type_t true_coord_y = image_point.y - __ldg(&lens->position_y[i]);
 			//if ((row == Ny) && (col == Nx)) printf("x = %f y = %f\n",  true_coord_x, true_coord_y);	
 			//
                         type_t cosi = __ldg(&lens->anglecos[i]);
                         type_t sinu = __ldg(&lens->anglesin[i]);
 			//
                         type_t x = true_coord_x*cosi + true_coord_y*sinu;
                         type_t y = true_coord_y*cosi - true_coord_x*sinu;
 			//
 			//if ((row == Ny) && (col == Nx)) printf("x = %f y = %f\n",  x, y);	
                         //
                         type_t eps = __ldg(&lens->ellipticity_potential[i]);
                         //
                         type_t sqe  = sqrt(eps);
                         //
                         type_t rem2 = x*x/((1. + eps)*(1. + eps)) + y*y/((1. - eps)*(1. - eps));
                         //
                         complex zci;
                         complex znum, zden, zres;
                         type_t norm;
                         //
 			zci.re  = 0;
                         zci.im  = -0.5*(1. - eps*eps)/sqe;
 			//@@if ((col == Nx) && (row == Ny)) printf("%d %d, zis: %f %f\n", row, col, zci.re, zci.im);
                         //
                         type_t rc  = __ldg(&lens->rcore[i]);
                         type_t cx1  = (1. - eps)/(1. + eps);
                         znum.re = cx1*x;
                         znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1;
                         //
                         zden.re = x;
                         zden.im = 2.*rc*sqe - y;
                         norm    = (zden.re*zden.re + zden.im*zden.im);     // zis = znum/zden
 			//@@if ((col == Nx) && (row == Ny)) printf("norm = %f\n", norm);
                         //
                         zis.re  = (znum.re*zden.re + znum.im*zden.im)/norm;
                         zis.im  = (znum.im*zden.re - znum.re*zden.im)/norm;
 			//
 			//@@if ((col == Nx) && (row == Ny)) printf("%d %d, zis: %f %f\n", row, col, zis.re, zis.im);
                         //
                         norm    = zis.re;
                         //
                         zis.re  = log(sqrt(norm*norm + zis.im*zis.im));  // ln(zis) = ln(|zis|)+i.Arg(zis)
                         zis.im  = atan2(zis.im, norm);
 			//
 			//@@if ((col == Nx) && (row == Ny)) printf("%d %d, zis: %f %f\n", row, col, zis.re, zis.im);
                         //
                         zres.re = zci.re*zis.re - zci.im*zis.im;   // Re( zci*ln(zis) )
                         zres.im = zci.im*zis.re + zis.im*zci.re;   // Im( zci*ln(zis) )
 			//
 			//@@if ((col == Nx) && (row == Ny)) printf("%d %d, zres: %f %f\n", row, col, zres.re, zres.im);
                         //
                         type_t b0  = __ldg(&lens->b0[i]);
                         grad.x += b0*(zres.re*cosi - zres.im*sinu);
                         grad.y += b0*(zres.im*cosi + zres.re*sinu);
 			//@@if ((col == Nx) && (row == Ny)) printf("grad: %f %f\n", grad.x, grad.y);
                 }
                 //IACA_END;
                 //
                 grid_grad_x[index] = grad.x;
                 grid_grad_y[index] = grad.y;
 		//if ((row == 0) && (col == 9)) 
 		//printf("%f %f: %f %f\n",  image_point.x, image_point.y, grid_grad_x[index], grid_grad_y[index]);
         }
 }
 
 __global__
 void
 module_potentialDerivatives_totalGradient_8_SOA_GPU_SM2(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int shalos, int nhalos)
 {
 	//
         //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins");
         // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
         //
         type_t grad_x, grad_y;
 	type_t clumpgrad_x, clumpgrad_y;
 	type_t image_point_x, image_point_y;
 	//
 	SHARED type_t cosi	[200];
 	SHARED type_t sinu	[200];
 	SHARED type_t rc	[200];
 	SHARED type_t b0	[200];
 	SHARED type_t epsi	[200];
 	SHARED type_t position_x[200];
 	SHARED type_t position_y[200];
 	SHARED type_t rsqe	[200];
 	SHARED type_t sonepeps	[200];
 	SHARED type_t sonemeps	[200];
         //
         grad_x = 0;
         grad_y = 0;
         //
         int col = blockIdx.x*blockDim.x + threadIdx.x;
         int row = blockIdx.y*blockDim.y + threadIdx.y;
 	int ithread  = threadIdx.y*blockDim.x + threadIdx.x;
 	//
 	int index = row*nbgridcells + col;
 	//
 	//grid_grad_x[index] = 0.;
 	//grid_grad_y[index] = 0.;
 	//
 	type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
 	type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
 	//
 	image_point_x = frame->xmin + col*dx;
 	image_point_y = frame->ymin + row*dy;
 	//
 	int i = ithread;
 	if (i < nhalos)
 	{
 		cosi[i]       = __ldg(&lens->anglecos		  [shalos + i]);
 		sinu[i]       = __ldg(&lens->anglesin		  [shalos + i]);
 		position_x[i] = __ldg(&lens->position_x		  [shalos + i]);
 		position_y[i] = __ldg(&lens->position_y		  [shalos + i]);
 		rc[i]         = __ldg(&lens->rcore		  [shalos + i]);
 		b0[i]         = __ldg(&lens->b0		          [shalos + i]);
 		epsi[i]       = __ldg(&lens->ellipticity_potential[shalos + i]);
 		//sonemeps[i]   = 1 - epsi[i];
 		//sonepeps[i]   = 1 + epsi[i];
 		rsqe[i]	      = sqrt(epsi[i]);
 	}
 	__syncthreads();
 	//
 	if ((row < nbgridcells) && (col < nbgridcells))
 	{
 		for(int i = 0; i < nhalos; i++)
 		{
 			//
 			type_t true_coord_x = image_point_x - position_x[i];
 			type_t true_coord_y = image_point_y - position_y[i];
 			//
 			type_t x = true_coord_x*cosi[i] + true_coord_y*sinu[i];
 			type_t y = true_coord_y*cosi[i] - true_coord_x*sinu[i];
 			//
 			type_t eps     = epsi[i];
 			//type_t onemeps = 1 - eps;
 			//type_t onepeps = 1 + eps;
 			//
 			//type_t eps     = epsi[i];
 			type_t onemeps = sonemeps[i];
 			type_t onepeps = sonepeps[i];
 			//
 			//type_t sqe  = sqrt(eps);
 			type_t sqe  = rsqe[i];
 			type_t rem2 = x*x/(onepeps*onepeps) + y*y/(onemeps*onemeps);
 			//
 			complex      zis;
 			//
 			type_t znum_re, znum_im;
 			type_t zres_re, zres_im;
 			type_t norm;
 			type_t zden_re, zden_im;
 			type_t  zis_re,  zis_im;
 			//
 			type_t zci_im  = -0.5*(1. - eps*eps)/sqe;
 			//
 			type_t cx1  = onemeps/onepeps;
 			//
 			znum_re = cx1*x;
 			znum_im = 2.*sqe*sqrt(rc[i]*rc[i] + rem2) - y/cx1;
 			//
 			zden_re = x;
 			zden_im = 2.*rc[i]*sqe - y;
 			//
 			norm    = (x*x + zden_im*zden_im);     // zis = znum/zden
 			zis.re  = (znum_re*x + znum_im*zden_im)/norm;
 			zis.im  = (znum_im*x - znum_re*zden_im)/norm;
 			//
 			norm    = zis.re;
 			//
 			zis.re  = log(sqrt(norm*norm + zis.im*zis.im));  // ln(zis) = ln(|zis|)+i.Arg(zis)
 			zis.im  = atan2(zis.im, norm);
 			//
 			zres_re = - zci_im*zis.im;   // Re( zci*ln(zis) )
 			zres_im =   zci_im*zis.re;   // Im( zci*ln(zis) )
 			//
 			grad_x += b0[i]*(zres_re*cosi[i] - zres_im*sinu[i]);
 			grad_y += b0[i]*(zres_im*cosi[i] + zres_re*sinu[i]);
 		}
 		//
 		grid_grad_x[index] = grad_x;
 		grid_grad_y[index] = grad_y;
 		//__syncthreads();
 	}
 }
 //
 //
 //
 __global__
 void
 module_potentialDerivatives_totalGradient_8_SOA_GPU_SM3(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA
  *lens, const struct grid_param *frame, int nbgridcells, int shalos, int nhalos)
 {
         //
         //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins");
         // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
         //
         type_t grad_x, grad_y;
         type_t clumpgrad_x, clumpgrad_y;
         type_t image_point_x, image_point_y;
         //
         SHARED type_t cosi      [200];
         SHARED type_t sinu      [200];
         SHARED type_t rci       [200];
         SHARED type_t b0        [200];
         SHARED type_t epsi      [200];
         SHARED type_t position_x[200];
         SHARED type_t position_y[200];
         SHARED type_t rsqe      [200];
 	//SHARED type_t sgrad_x   [(BLOCK_SIZE_X + 1)*BLOCK_SIZE_Y];
 	//SHARED type_t sgrad_y   [(BLOCK_SIZE_X + 1)*BLOCK_SIZE_Y];
 
         //SHARED type_t sonepeps  [200];
         //SHARED type_t sonemeps  [200];
         //
         grad_x         = 0;
         grad_y 	       = 0;
         //
         int row        = blockIdx.y*blockDim.y + threadIdx.y;
         int col        = blockIdx.x*blockDim.x + threadIdx.x;
 	//
 	//int loc_row    = threadIdx.x;
 	//int loc_col    = threadIdx.y*blockDim.x + threadIdx.x;
 	//
         //int grid_size  = nbgridcells/blockDim.y; 
 	//
 	//if (threadIdx.x == 0) printf("%d %d %d: row = %d, col = %d, grid_size = %d\n", blockIdx.y, gridDim.y, threadIdx.y, row, col, grid_size);
         //
 	type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
 	type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
 	//if (threadIdx.x == 0) printf("dx = %f, dy = %f\n", dx, dy);
 	//
 	image_point_x = frame->xmin + col*dx;
 	image_point_y = frame->ymin + row*dy;
 	//
 	//int iloc  = threadIdx.x*blockDim.y + threadIdx.y;
 	int iglob = row*nbgridcells + col;
 	int numThreads = blockDim.x*blockDim.y;
 	//
 	for (int i = 0; i < (nhalos + numThreads - 1)/numThreads; ++i)
 	{ 
 		int iloc  = threadIdx.y*blockDim.x + threadIdx.x + i*numThreads;
 		if (iloc < nhalos)
 		{
 			cosi[iloc]       = __ldg(&lens->anglecos             [shalos + iloc]);
 			sinu[iloc]       = __ldg(&lens->anglesin             [shalos + iloc]);
 			position_x[iloc] = __ldg(&lens->position_x           [shalos + iloc]);
 			position_y[iloc] = __ldg(&lens->position_y           [shalos + iloc]);
 			rci[iloc]        = __ldg(&lens->rcore                [shalos + iloc]);
 			b0[iloc]         = __ldg(&lens->b0                   [shalos + iloc]);
 			epsi[iloc]       = __ldg(&lens->ellipticity_potential[shalos + iloc]);
 			rsqe[iloc]       = sqrt(epsi[iloc]);
 		}
 	}
 	__syncthreads();
 	//
 	if ((row < nbgridcells) && (col < nbgridcells))
 	{
 		//
 		for(int i = 0; i < nhalos; i++)
 		{
 			//int index  = iloc; 
 #if 1
 			type_t rc      = rci[i];
 			type_t eps     = epsi[i];
 			type_t onemeps = 1 - eps;
 			type_t onepeps = 1 + eps;
 			//
 			type_t sqe = rsqe[i];
 			type_t cx1 = onemeps/onepeps;
 			//
 			//
 			//type_t zci_im = 1;
 			type_t zci_im  = -0.5*(1. - eps*eps)/sqe;
 			type_t inv_onepeps = 1./(onepeps*onepeps);
 			type_t inv_onemeps = 1./(onemeps*onemeps);
 #endif
 			//
 			{
 				//KERNEL_8;
 				type_t grad_x = grad_y = 0.;
 				type_t true_coord_y = image_point_y - position_y[i];
 				type_t true_coord_x = image_point_x - position_x[i];
 				KERNEL_8_reg(0);
 				grid_grad_x[iglob +  0] += grad_x;
 				grid_grad_y[iglob +  0] += grad_y;
 			}
 			/*
 			{
 				//KERNEL_8;
 				type_t grad_x = grad_y = 0.;
 				type_t true_coord_y = image_point_y - position_y[i];
 				type_t true_coord_x = image_point_x - position_x[i] + BLOCK_SIZE_X/2*dx;
 				KERNEL_8_reg(0);
 				grid_grad_x[iglob + BLOCK_SIZE_X/2] += grad_x;
 				grid_grad_y[iglob + BLOCK_SIZE_X/2] += grad_y;
 			}
 			*/
 		//
 		}
 	}
 }
 //
 //
 //
 __global__
 void
 module_potentialDerivatives_totalGradient_8_SOA_GPU_SM4(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA
 		lens, const struct grid_param *frame, int nbgridcells, int shalos, int nhalos/*, type_t* dtimer*/)
 {
 	//
 	//asm volatile("# module_potentialDerivatives_totalGradient_SOA begins");
 	// 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
 	//
-	type_t grad_x, grad_y;
-	type_t clumpgrad_x, clumpgrad_y;
-	type_t image_point_x, image_point_y;
+	//type_t grad_x, grad_y;
+	//type_t clumpgrad_x, clumpgrad_y;
+	//type_t image_point_x, image_point_y;
 	//
 	//
-	type_t* cosi       = &shared[0*nhalos];
-	type_t* sinu       = &shared[1*nhalos];
-	type_t* rc         = &shared[2*nhalos];
-	type_t* b0         = &shared[3*nhalos];
-	type_t* epsi       = &shared[4*nhalos];
-	type_t* position_x = &shared[5*nhalos];
-	type_t* position_y = &shared[6*nhalos];
-	type_t* rsqe       = &shared[7*nhalos];
+	//type_t* cosi       = &shared[0*nhalos];
+	//type_t* sinu       = &shared[1*nhalos];
+	//type_t* rc         = &shared[2*nhalos];
+	//type_t* b0         = &shared[3*nhalos];
+	//type_t* epsi       = &shared[4*nhalos];
+	//type_t* position_x = &shared[5*nhalos];
+	//type_t* position_y = &shared[6*nhalos];
+	//type_t* rsqe       = &shared[7*nhalos];
 
 	//SHARED type_t sonepeps  [200];
 	//SHARED type_t sonemeps  [200];
 	//
 	grad_x         = 0;
 	grad_y         = 0;
 	//
 	int grid_size  =  nbgridcells/gridDim.y;
 	int row        =  blockIdx.x*blockDim.x + threadIdx.x;
 	int col        = (blockIdx.y*blockDim.y + threadIdx.y)/**grid_size*/;
 	//
 	//
 
 #if 0
 	//SHARED double sgrad_x [
 	if (/*(threadIdx.x == 0) &&*/ (blockIdx.x == 0))
 	{
 		if (threadIdx.x == 0) printf("blockDim.x = %d, blockIdx.x = %d grimdDim.x = %d threadIdx.x = %d\n", blockDim.x, blockIdx.x, gridDim.x, threadIdx.x);
 		if (threadIdx.x == 0) printf("blockDim.y = %d, blockIdx.y = %d grimdDim.y = %d threadIdx.y = %d\n", blockDim.y, blockIdx.y, gridDim.y, threadIdx.y);
 		if (threadIdx.x == 0) printf("row = %d, col = %d, grid_size = %d\n", row, col, grid_size);
 	}
 	__syncthreads();
 #endif
-	type_t* sgrad_x    = &shared[8*nhalos];
+	//type_t* sgrad_x    = &shared[8*nhalos];
 	type_t* sgrad_y    = &shared[8*nhalos + (grid_size + 1)*blockDim.x];
 	//
 	//
 	//grid_grad_x[index] = 0.;
 	//grid_grad_y[index] = 0.;
 	//
 	type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
 	type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
 	//
 	//if (threadIdx.x == 0) printf("dx = %f, dy = %f\n", dx, dy);
 	//
 	image_point_x = frame->xmin + col*dx;
 	image_point_y = frame->ymin + row*dy;
 	return;
 	//
-	int i = 0;
+	//int i = 0;
 #if 0
 	for (; i < nhalos; i = i + blockDim.x)	
 	{
 		int pos = threadIdx.x + i;
 		/*if ((threadIdx.x == 0) && (blockIdx.x == 0))*/ printf("pos = %d\n"); 
 		__syncthreads();
 		//
 		cosi[pos]       = __ldg(&lens->anglecos             [shalos + pos]);
 		sinu[pos]       = __ldg(&lens->anglesin             [shalos + pos]);
 		position_x[pos] = __ldg(&lens->position_x           [shalos + pos]);
 		position_y[pos] = __ldg(&lens->position_y           [shalos + pos]);
 		rc[pos]         = __ldg(&lens->rcore                [shalos + pos]);
 		b0[pos]         = __ldg(&lens->b0                   [shalos + pos]);
 		epsi[pos]       = __ldg(&lens->ellipticity_potential[shalos + pos]);
 		rsqe[pos]       = sqrt(epsi[i]);
 		//
 	}
 #endif
 #if 0
 	if (threadIdx.x == 0)
 		for (; i < nhalos; i += 1)
 		{
 			cosi[i]       = __ldg(&lens->anglecos             [shalos + i]);
 			sinu[i]       = __ldg(&lens->anglesin             [shalos + i]);
 			position_x[i] = __ldg(&lens->position_x           [shalos + i]);
 			position_y[i] = __ldg(&lens->position_y           [shalos + i]);
 			rc[i]         = __ldg(&lens->rcore                [shalos + i]);
 			b0[i]         = __ldg(&lens->b0                   [shalos + i]);
 			epsi[i]       = __ldg(&lens->ellipticity_potential[shalos + i]);
 			rsqe[i]       = sqrt(epsi[i]);
 		}
 #endif
 	__syncthreads();
 	//if ((row == col == 0)) printf("shared mem done...\n");
 	//
 	if (row < nbgridcells)
 	{
 		//for(int icol = 0; icol < grid_size; ++icol){
                 //      if (col + icol < nbgridcells){
                 //grad_x = grad_y = 0.;
 		int  index  = row*nbgridcells + col;
                 //
                 for(int i = 0; i < nhalos; i++)
                 {
                         int sindex  = threadIdx.x*grid_size; 
 #if 0
                         type_t eps     = epsi[i];
                         type_t onemeps = 1 - eps;
                         type_t onepeps = 1 + eps;
                         //
                         type_t sqe = rsqe[i];
                         type_t cx1 = onemeps/onepeps;
                         //
                         //
                         //type_t x = true_coord_y*sinu[i];
                         //type_t y = true_coord_y*cosi[i];
                         type_t true_coord_y = image_point_y - position_y[i];
                         type_t true_coord_x = image_point_x - position_x[i] /*+ icol*dx*/;
                         //
                         complex zci;
                         zci.im  = -0.5*(1. - eps*eps)/sqe;
                         type_t inv_onepeps = 1./(onepeps*onepeps);
                         type_t inv_onemeps = 1./(onemeps*onemeps);
 #endif
                         //
                         for(int icol = 0; icol < grid_size; ++icol)
                         {
                                 if (col + icol < nbgridcells)
                                 {
 #if 0
                                         if ((row == 1) && (col == 1)) printf("%d %d: %f %f\n", row, col, true_coord_x, true_coord_y);
 
                                         true_coord_x = image_point_x - position_x[i] + icol*dx;
                                         //
                                         //x += true_coord_x*cosi[i];
                                         //y -= true_coord_x*sinu[i];
                                         type_t x = true_coord_x*cosi[i] + true_coord_y*sinu[i];
                                         type_t y = true_coord_y*cosi[i] - true_coord_x*sinu[i];
                                         //
                                         //if ((row == 1) && (col == 0)) printf("i = %d, eps = %f\n", i, eps);
                                         //
                                         //double eps     = epsi[i];
                                         //double onemeps = sonemeps[i];
                                         //double onepeps = sonepeps[i];
                                         //
                                         //double sqe  = sqrt(eps);
                                         //double rem2 = x*x/(onepeps*onepeps) + y*y/(onemeps*onemeps);
                                         type_t rem2 = x*x*inv_onepeps + y*y*inv_onemeps;
                                         //
                                         //
                                         //double znum_re, znum_im;
                                         //double zres_re, zres_im;
                                         //double zden_re, zden_im;
                                         //double  zis_re,  zis_im;
                                         type_t norm;
                                         //
                                         complex      zis;
                                         complex      znum;
                                         complex      zden;
                                         complex      zres;
                                         //
                                         //double cx1  = onemeps/onepeps;
                                         //
                                         znum.re = cx1*x;
                                         znum.im = 2.*sqe*sqrt(rc[i]*rc[i] + rem2) - y/cx1;
                                         //
                                         zden.re = x;
                                         zden.im = 2.*rc[i]*sqe - y;
                                         //
                                         norm    = (x*x + zden.im*zden.im);     // zis = znum/zden
                                         zis.re  = (znum.re*x + znum.im*zden.im)/norm;
                                         zis.im  = (znum.im*x - znum.re*zden.im)/norm;
                                         //
                                         norm    = zis.re;
                                         //
                                         zis.re  = log(sqrt(norm*norm + zis.im*zis.im));  // ln(zis) = ln(|zis|)+i.Arg(zis)
                                         zis.im  = atan2(zis.im, norm);
                                         //
                                         zres.re = - zci.im*zis.im;   // Re( zci*ln(zis) )
                                         zres.im =   zci.im*zis.re;   // Im( zci*ln(zis) )
                                         //
                                         grid_grad_x[index] += b0[i]*(zres.re*cosi[i] - zres.im*sinu[i]);
                                         grid_grad_y[index] += b0[i]*(zres.im*cosi[i] + zres.re*sinu[i]);
 #endif
                                         //sgrad_x[sindex] += (float)  sindex;
                                         sgrad_y[sindex] += (float) -sindex;
                                         //sindex++;
                                         //
                                         //grid_grad_x[index] += grad_x;
                                         //grid_grad_y[index] += grad_y;
                                 }
                                 //
                         }
                 }
                         __syncthreads();
 	return;
                 //
 #if 0
 		int sindex = threadIdx.x*grid_size;
 		for(int icol = 0; icol < grid_size; ++icol)
 		{
 			if (col + icol < nbgridcells)
 			{
                 		grid_grad_x[index + col] = sgrad_x[sindex];
                 		grid_grad_y[index + col] = sgrad_y[sindex];
 				sindex++;
 			}
 		}
 		__syncthreads();
 #endif
         }
 }
 
 
 
 __global__
 void
 module_potentialDerivatives_totalGradient_8_SOA_GPU_v2(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int i, int nhalos)
 {
 	//asm volatile("# module_potentialDerivatives_totalGradient_SOA begins");
 	// 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
 	//
-	struct point grad, clumpgrad, image_point;
+	struct point grad, image_point;
 	grad.x = 0;
 	grad.y = 0;
 	//
 	int row = blockIdx.y * blockDim.y + threadIdx.y;
 	int col = blockIdx.x * blockDim.x + threadIdx.x;
 	//
 	if ((row < nbgridcells) && (col < nbgridcells))
 	{
 		//
 		int index = col*nbgridcells + row;
 		//
 		//grid_grad_x[index] = 0.;
 		//grid_grad_y[index] = 0.;
 		//
 		type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
 		type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
 		//
 #if 0
 		/*SHARED*/ type_t img_pt[2];
 		if ((row == 0) && (col == 0))
 		{
 			img_pt[0] = frame->xmin + col*dx;
 			img_pt[1] = frame->ymin + row*dy;
 		}
 		__syncthreads();
 #else
 		image_point.x = frame->xmin + col*dx;
 		image_point.y = frame->ymin + row*dy;
 #endif
 		//
 		//
 		//for(int i = shalos; i < shalos + nhalos; i++)
 		//{
 		//IACA_START;
 		//
-		struct point true_coord, true_coord_rot; //, result;
+		struct point true_coord; //, result;
 		//type_t       R, angular_deviation;
-		complex      zis;
+		//complex      zis;
 		//
 		//result.x = result.y = 0.;
 		//
 #if 0
 		true_coord.x = img_pt[0] - __ldg(&lens->position_x[i]);
 		true_coord.y = img_pt[1] - __ldg(&lens->position_y[i]);
 #else
 		true_coord.x = image_point.x - __ldg(&lens->position_x[i]);
 		true_coord.y = image_point.y - __ldg(&lens->position_y[i]);
 #endif
 		type_t cosi = __ldg(&lens->anglecos[i]);
 		type_t sinu = __ldg(&lens->anglesin[i]);
 		// positionning at the potential center
 		// Change the origin of the coordinate system to the center of the clump
 		type_t x = true_coord.x*cosi + true_coord.y*sinu;
 		type_t y = true_coord.y*cosi - true_coord.x*sinu;
 		//
 		type_t eps = __ldg(&lens->ellipticity_potential[i]);
 		//
 		type_t sqe  = sqrt(eps);
 		//
 		type_t rem2 = x*x/((1. + eps)*(1. + eps)) + y*y/((1. - eps)*(1. - eps));
 		//
-		complex zci;
+		//complex zci;
 		complex znum, zden, zres;
 		type_t norm;
 		//
 		zci.im  = -0.5*(1. - eps*eps)/sqe;
 		//
 		type_t rc  = __ldg(&lens->rcore[i]);
 		type_t cx1  = (1. - eps)/(1. + eps);
 		znum.re = cx1*x;
 		znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1;
 		//
 		zden.re = x;
 		zden.im = 2.*rc*sqe - y;
 		norm    = (zden.re*zden.re + zden.im*zden.im);     // zis = znum/zden
 		//
 		zis.re  = (znum.re*zden.re + znum.im*zden.im)/norm;
 		zis.im  = (znum.im*zden.re - znum.re*zden.im)/norm;
 		//
 		type_t b0  = __ldg(&lens->b0[i]);
-		grad.x += b0*(zres.re*cosi - zres.im*sinu);
-		grad.y += b0*(zres.im*cosi + zres.re*sinu);
+		//grad.x += b0*(zres.re*cosi - zres.im*sinu);
+		//grad.y += b0*(zres.im*cosi + zres.re*sinu);
 		//
 		grid_grad_x[index] += grad.x;
 		grid_grad_y[index] += grad.y;
 	}
 }
 
-
+#endif
 
 __device__ point module_potentialDerivatives_totalGradient_5_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos){
     //asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins");
     //printf("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins\n");
     //
     struct point grad, result;
     grad.x = 0;
     grad.y = 0;
     for(int i = shalos; i < shalos + nhalos; i++)
     {
             //
-		struct point true_coord, true_coord_rotation;
+		struct point true_coord;
 		//
 		true_coord.x = pImage->x - lens->position_x[i];
 		true_coord.y = pImage->y - lens->position_y[i];
 		//
 		//true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]);
 		type_t cose = lens->anglecos[i];
 		type_t sine = lens->anglesin[i];
 		//
 		type_t x = true_coord.x*cose + true_coord.y*sine;
 		type_t y = true_coord.y*cose - true_coord.x*sine;
 		//
 		type_t ell_pot = lens->ellipticity_potential[i];
 		//
 		type_t b0_inv_R = lens->b0[i]/sqrt(x*x*(1 - ell_pot) + y*y*(1 + ell_pot));
 		//
 		result.x = (1 - ell_pot)*x*b0_inv_R;
 		result.y = (1 + ell_pot)*y*b0_inv_R;
 		//
 		grad.x += result.x*cose - result.y*sine;
 		grad.y += result.y*cose + result.x*sine;
     }
     return grad;
 
 }
 __device__ point module_potentialDerivatives_totalGradient_8_SOA_GPU(const struct point *image_point, const struct Potential_SOA *lens, int shalos, int nhalos){
 	struct point grad;
 	grad.x = 0;
 	grad.y = 0;
 	//
 	for(int i = shalos; i < shalos + nhalos; i++)
 	{
         complex      zis;
         // positioning at the potential center
         // Change the origin of the coordinate system to the center of the clump
         //@@if ((row == Ny) && (col == Nx)) printf("image_x = %f, %f image_y = %f, %f\n",  image_point.x, frame->xmin, image_point.y,frame->ymin);
 
         type_t true_coord_x = image_point->x - __ldg(&lens->position_x[i]);
         type_t true_coord_y = image_point->y - __ldg(&lens->position_y[i]);
         //if ((row == Ny) && (col == Nx)) printf("x = %f y = %f\n",  true_coord_x, true_coord_y);
         //
         type_t cosi = __ldg(&lens->anglecos[i]);
         type_t sinu = __ldg(&lens->anglesin[i]);
         //
         type_t x = true_coord_x*cosi + true_coord_y*sinu;
         type_t y = true_coord_y*cosi - true_coord_x*sinu;
         //
         //if ((row == Ny) && (col == Nx)) printf("x = %f y = %f\n",  x, y);
         //
         type_t eps = __ldg(&lens->ellipticity_potential[i]);
         //
         type_t sqe  = sqrt(eps);
         //
         type_t rem2 = x*x/((1. + eps)*(1. + eps)) + y*y/((1. - eps)*(1. - eps));
         //
         complex zci;
         complex znum, zden, zres;
         type_t norm;
         //
         zci.re  = 0;
         zci.im  = -0.5*(1. - eps*eps)/sqe;
         //@@if ((col == Nx) && (row == Ny)) printf("%d %d, zis: %f %f\n", row, col, zci.re, zci.im);
         //
         type_t rc  = __ldg(&lens->rcore[i]);
         type_t cx1  = (1. - eps)/(1. + eps);
 
         znum.re = cx1*x;
         znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1;
         //
         zden.re = x;
         zden.im = 2.*rc*sqe - y;
         norm    = (zden.re*zden.re + zden.im*zden.im);     // zis = znum/zden
         //@@if ((col == Nx) && (row == Ny)) printf("norm = %f\n", norm);
         //
         zis.re  = (znum.re*zden.re + znum.im*zden.im)/norm;
         zis.im  = (znum.im*zden.re - znum.re*zden.im)/norm;
         //
         //@@if ((col == Nx) && (row == Ny)) printf("%d %d, zis: %f %f\n", row, col, zis.re, zis.im);
         //
         norm    = zis.re;
         //
         zis.re  = log(sqrt(norm*norm + zis.im*zis.im));  // ln(zis) = ln(|zis|)+i.Arg(zis)
         zis.im  = atan2(zis.im, norm);
         //
         //@@if ((col == Nx) && (row == Ny)) printf("%d %d, zis: %f %f\n", row, col, zis.re, zis.im);
         //
         zres.re = zci.re*zis.re - zci.im*zis.im;   // Re( zci*ln(zis) )
         zres.im = zci.im*zis.re + zis.im*zci.re;   // Im( zci*ln(zis) )
         //
         //@@if ((col == Nx) && (row == Ny)) printf("%d %d, zres: %f %f\n", row, col, zres.re, zres.im);
         //
         type_t b0  = __ldg(&lens->b0[i]);
 
         grad.x += b0*(zres.re*cosi - zres.im*sinu);
         grad.y += b0*(zres.im*cosi + zres.re*sinu);
         //@@if ((col == Nx) && (row == Ny)) printf("grad: %f %f\n", grad.x, grad.y);
 	}
 	//IACA_END;
 	//
 	return(grad);
 
 }
 __device__ point module_potentialDerivatives_totalGradient_81_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos){
     //asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins");
     //std::cout << "# module_potentialDerivatives_totalGradient_81_SOA begins" << std::endl;
     // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
     //
-    struct point grad, clumpgrad;
+    struct point grad;
     grad.x = 0;
     grad.y = 0;
     for(int i = shalos; i < shalos + nhalos; i++)
     {
 		//IACA_START;
 		//
-		struct point true_coord, true_coord_rot; //, result;
+		struct point true_coord; //, result;
 		//type_t       R, angular_deviation;
 		complex      zis;
 		//
 		//result.x = result.y = 0.;
 		//
 		true_coord.x = pImage->x - lens->position_x[i];
 		true_coord.y = pImage->y - lens->position_y[i];
 		/*positionning at the potential center*/
 		// Change the origin of the coordinate system to the center of the clump
 		type_t cose = lens->anglecos[i];
 		type_t sine = lens->anglesin[i];
 		type_t x = true_coord.x*cose + true_coord.y*sine;
 		type_t y = true_coord.y*cose - true_coord.x*sine;
 		//
 		type_t eps  = lens->ellipticity_potential[i];
 		type_t rc   = lens->rcore[i];
 		type_t rcut = lens->rcut[i];
 		type_t b0   = lens->b0[i];
 		type_t t05  = b0*rcut/(rcut - rc);
 		//
 		type_t sqe  = sqrt(eps);
 		//
 		type_t cx1  = (1. - eps)/(1. + eps);
 		type_t cxro = (1. + eps)*(1. + eps);
 		type_t cyro = (1. - eps)*(1. - eps);
 		//
 		type_t rem2 = x*x/cxro + y*y/cyro;
 		//
 		complex zci, znum, zden, zres_rc, zres_rcut;
 		type_t norm;
 		//
 		zci.re  = 0;
 		zci.im  = -0.5*(1. - eps*eps)/sqe;
 		//
 		// step 1
 		{
 		KERNEL(rc, zres_rc)
 		}
 		// step 2
 		{
 		KERNEL(rcut, zres_rcut)
 		}
 		zis.re  = t05*(zres_rc.re - zres_rcut.re);
 		zis.im  = t05*(zres_rc.im - zres_rcut.im);
 		// rotation
 		grad.x += (zis.re*cose - zis.im*sine);
 		grad.y += (zis.im*cose + zis.re*sine);
             //
     }
     //
     return(grad);
 
 }
 
 #if 1
 typedef struct point (*halo_func_GPU_t) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos);
 
 __constant__ halo_func_GPU_t halo_func_GPU[100] =
 {
 	0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_5_SOA_GPU, 0, 0, module_potentialDerivatives_totalGradient_8_SOA_GPU,  0,
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	   0,  module_potentialDerivatives_totalGradient_81_SOA_GPU, 0, 0, 0, 0, 0, 0, 0, 0,
 	   0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 	   };
 #endif
 
 __global__
 void
 module_potentialDerivatives_totalGradient_SOA_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart)
 {
         struct point grad, clumpgrad, image_point;
         //
         int col = blockIdx.x*blockDim.x + threadIdx.x;
         int row = blockIdx.y*blockDim.y + threadIdx.y;
 
         //
         if ((row + jstart < nbgridcells_y) && (col + istart < nbgridcells_x))
         {
                 //
                 //double dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
                 //double dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
                 //
                 int index = row*nbgridcells_x + col;
                 // Create temp grad variable to minimise writing to global memory grid_grad
                 grad.x = 0.;
                 grad.y = 0.;
                 //
                 image_point.x = frame->xmin + (col + istart)*dx;
                 image_point.y = frame->ymin + (row + jstart)*dy;
                 //
                 int shalos = 0;
                 while (shalos < nhalos)
                 {
                         int lens_type = lens->type[shalos];
                         int count     = 1;
                         while (lens->type[shalos + count] == lens_type) count++;
                         //
                         //if(row == 0 && col == 0) printf("type = %d, count %d , shalos %d \n", lens_type,count,shalos );
                         //
                         clumpgrad = (*halo_func_GPU[lens_type])(&image_point, lens, shalos, count);
                         //
                         grad.x += clumpgrad.x;
                         grad.y += clumpgrad.y;
                         shalos += count;
                 }
                 // Write to global memory
                 grid_grad_x[index] = grad.x;
                 grid_grad_y[index] = grad.y;
                 //if ((row == 0) && (col == 9))
                 //printf("%f %f: %f %f\n",  image_point.x, image_point.y, grid_grad_x[index], grid_grad_y[index]);
         }
 }
 
 __global__
 void
 module_potentialDerivatives_totalGradient_SOA_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int nhalos)
 {
     struct point grad, clumpgrad, image_point;
     //
     int col = blockIdx.x*blockDim.x + threadIdx.x;
     int row = blockIdx.y*blockDim.y + threadIdx.y;
 
     //
     if ((row < nbgridcells) && (col < nbgridcells))
     {
         //
         type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
         type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
 		//
 		int index = row*nbgridcells + col;
 		// Create temp grad variable to minimise writing to global memory grid_grad
 		grad.x = 0;
 		grad.y = 0;
 		//
 		image_point.x = frame->xmin + col*dx;
 		image_point.y = frame->ymin + row*dy;
 		//
 		int shalos = 0;
 		while (shalos < nhalos)
 		{
 			int lens_type = lens->type[shalos];
 			int count     = 1;
 			while (lens->type[shalos + count] == lens_type) count++;
 			//
 			//if(row == 0 && col == 0) printf("type = %d, count %d , shalos %d \n", lens_type,count,shalos );
 			//
 			clumpgrad = (*halo_func_GPU[lens_type])(&image_point, lens, shalos, count);
 			//
 			grad.x += clumpgrad.x;
 			grad.y += clumpgrad.y;
 			shalos += count;
 		}
 		// Write to global memory
 		grid_grad_x[index] = grad.x;
 		grid_grad_y[index] = grad.y;
 
 	//if ((row == 0) && (col == 9))
 	//printf("%f %f: %f %f\n",  image_point.x, image_point.y, grid_grad_x[index], grid_grad_y[index]);
     }
 
 }
 
diff --git a/src/grid_gradient2_GPU.cu b/src/grid_gradient2_GPU.cu
index 72aff04..13540d0 100644
--- a/src/grid_gradient2_GPU.cu
+++ b/src/grid_gradient2_GPU.cu
@@ -1,224 +1,220 @@
 //
 //
 //
 #include <fstream>
 #include "grid_gradient2_GPU.cuh"
 #include "gradient2_GPU.cuh"
 #include <structure_hpc.hpp>
 
 #define BLOCK_SIZE_X 32
 #define BLOCK_SIZE_Y 16
 
 //#define ROT
 
 #define _SHARED_MEM
 
 #ifdef _SHARED_MEM
 #define SHARED __shared__
 #warning "shared memory"
 extern __shared__ type_t shared[];
 #else
 #define SHARED 
 #endif
 
 #define Nx 1
 #define Ny 0
 
 
 #define cudasafe 
 
 extern "C" 
 {
 	type_t myseconds();
 }
 
 __global__ void module_potentialDerivatives_totalGradient2_SOA_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d,  const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int nhalos);
 
 ////
 void
 module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(type_t *grid_grad2_a, type_t *grid_grad_b, type_t *grid_grad2_c, type_t *grid_grad_d, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos);
 //
 void gradient2_grid_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart);
 //
 //void
 //module_potentialDerivatives_totalGradient_SOA_CPU_GPU_v2(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_cpu, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos);
 //
 //
 //
 void gradient2_grid_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos ,int nbgridcells)
 {
 	type_t dx = (frame->xmax - frame->xmin)/(nbgridcells - 1);
     type_t dy = (frame->ymax - frame->ymin)/(nbgridcells - 1);
         //
     gradient2_grid_GPU(grid_grad2_a, grid_grad2_b,grid_grad2_c, grid_grad2_d, frame, lens, nhalos, dx, dy, nbgridcells, nbgridcells, 0, 0);
 }
 //
 //
 //
 void gradient2_grid_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart)
 {
 
 	int nBlocks_gpu = 0;
 	// Define the number of threads per block the GPU will use
 	cudaDeviceProp properties_gpu;
 
 	cudaGetDeviceProperties(&properties_gpu, 0); // Get properties of 0th GPU in use
 
 	if (properties_gpu.maxThreadsDim[0]<threadsPerBlock)
 	{
 		fprintf(stderr, "ERROR: The GPU has to support at least %u threads per block.\n", threadsPerBlock);
 		exit(-1);
 	}
 	else
 	{
 		nBlocks_gpu = properties_gpu.maxGridSize[0] / threadsPerBlock;  // Get the maximum number of blocks with the chosen number of threads
 		// per Block that the GPU supports
 	}
 
 	grid_param *frame_gpu;
 	Potential_SOA *lens_gpu,*lens_kernel;
 	int *type_gpu;
 	type_t *lens_x_gpu, *lens_y_gpu, *b0_gpu, *angle_gpu, *epot_gpu, *rcore_gpu, *rcut_gpu, *anglecos_gpu, *anglesin_gpu;
 	type_t *grid_grad2_a_gpu, *grid_grad2_b_gpu , *grid_grad2_c_gpu, *grid_grad2_d_gpu;
 
 	lens_gpu = (Potential_SOA *) malloc(sizeof(Potential_SOA));
 	lens_gpu->type = (int *) malloc(sizeof(int));
 
 	// Allocate variables on the GPU
 	cudasafe(cudaMalloc( (void**)&(lens_kernel), sizeof(Potential_SOA)),"Gradient2gpu.cu : Alloc Potential_SOA: " );
 	cudasafe(cudaMalloc( (void**)&(type_gpu), nhalos*sizeof(int)),"Gradient2gpu.cu : Alloc type_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(lens_x_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc x_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(lens_y_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc y_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(b0_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc b0_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(angle_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc angle_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(epot_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc epot_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(rcore_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc rcore_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(rcut_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc rcut_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(anglecos_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc anglecos_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(anglesin_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc anglesin_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(frame_gpu), sizeof(grid_param)),"Gradient2gpu.cu : Alloc frame_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(grid_grad2_a_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_a_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(grid_grad2_b_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_b_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(grid_grad2_c_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_c_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(grid_grad2_d_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_d_gpu: " );
 	// Copy values to the GPU
 	//
 	cudasafe(cudaMemcpy(type_gpu,lens->type , nhalos*sizeof(int),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy type_gpu: " );
 	cudasafe(cudaMemcpy(lens_x_gpu,lens->position_x , nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy x_gpu: " );
 	cudasafe(cudaMemcpy(lens_y_gpu,lens->position_y , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy y_gpu: " );
 	cudasafe(cudaMemcpy(b0_gpu,lens->b0 , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2pu.cu : Copy b0_gpu: " );
 	cudasafe(cudaMemcpy(angle_gpu,lens->ellipticity_angle , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy angle_gpu: " );
 	cudasafe(cudaMemcpy(epot_gpu, lens->ellipticity_potential, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy epot_gpu: " );
 	cudasafe(cudaMemcpy(rcore_gpu, lens->rcore, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy rcore_gpu: " );
 	cudasafe(cudaMemcpy(rcut_gpu, lens->rcut, nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy rcut_gpu: " );
 	cudasafe(cudaMemcpy(anglecos_gpu, lens->anglecos, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy anglecos: " );
 	cudasafe(cudaMemcpy(anglesin_gpu, lens->anglesin, nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy anglesin: " );
 	cudasafe(cudaMemcpy(frame_gpu, frame, sizeof(grid_param), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy fame_gpu: " );
 	//
 	lens_gpu->type 			= type_gpu;
 	lens_gpu->position_x 		= lens_x_gpu;
 	lens_gpu->position_y 		= lens_y_gpu;
 	lens_gpu->b0 			= b0_gpu;
 	lens_gpu->ellipticity_angle 	= angle_gpu;
 	lens_gpu->ellipticity_potential = epot_gpu;
 	lens_gpu->rcore 		= rcore_gpu;
 	lens_gpu->rcut 			= rcut_gpu;
 	lens_gpu->anglecos 		= anglecos_gpu;
 	lens_gpu->anglesin 		= anglesin_gpu;
 	//
 	cudaMemcpy(lens_kernel, lens_gpu, sizeof(Potential_SOA), cudaMemcpyHostToDevice);
 	//
 	type_t time = -myseconds();
 	//module_potentialDerivatives_totalGradient_SOA_CPU_GPU(grid_grad_x_gpu, grid_grad_y_gpu, frame_gpu, lens_kernel, nbgridcells_x, nhalos);
 	module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(grid_grad2_a_gpu, grid_grad2_b_gpu, grid_grad2_c_gpu, grid_grad2_d_gpu, frame_gpu, lens_kernel, nhalos, dx, dy, nbgridcells_x, nbgridcells_y, istart, jstart);
 	//
 	//cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU");
 	cudaDeviceSynchronize();
 	time += myseconds();
 	//std::cout << "	kernel time = " << time << " s." << std::endl;
 	//
 
 	cudasafe(cudaMemcpy( grid_grad2_a, grid_grad2_a_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost )," --- Gradient2gpu.cu : Copy source_a_gpu: " );
 	cudasafe(cudaMemcpy( grid_grad2_b, grid_grad2_b_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost)," --- Gradient2gpu.cu : Copy source_b_gpu: " );
 	cudasafe(cudaMemcpy( grid_grad2_c, grid_grad2_c_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost )," --- Gradient2gpu.cu : Copy source_c_gpu: " );
 	cudasafe(cudaMemcpy( grid_grad2_d, grid_grad2_d_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost)," --- Gradient2gpu.cu : Copy source_d_gpu: " );
 	//
 	//printf("-----> %f %f \n",grid_grad_x[Nx], grid_grad_y[Ny]);
 	// Free GPU memory
 	cudaFree(lens_gpu);
 	cudaFree(type_gpu);
 	cudaFree(lens_x_gpu);
 	cudaFree(lens_y_gpu);
 	cudaFree(b0_gpu);
 	cudaFree(angle_gpu);
 	cudaFree(epot_gpu);
 	cudaFree(rcore_gpu);
 	cudaFree(rcut_gpu);
 	cudaFree(anglecos_gpu);
 	cudaFree(anglesin_gpu);
 	cudaFree(grid_grad2_a_gpu);
 	cudaFree(grid_grad2_b_gpu);
 	cudaFree(grid_grad2_c_gpu);
 	cudaFree(grid_grad2_d_gpu);
 }
 
 void 
 module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart)
 {
         int GRID_SIZE_X = (nbgridcells_x + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks
         int GRID_SIZE_Y = (nbgridcells_y + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y;
         //
         printf("grid_size_x = %d, grid_size_y = %d, nbgridcells_x = %d, nbgridcells_y = %d, istart = %d, jstart = %d (split)\n", GRID_SIZE_X, GRID_SIZE_Y, nbgridcells_x, nbgridcells_y, istart, jstart);
         //
         dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1);
         dim3 grid   (GRID_SIZE_X , GRID_SIZE_Y);
-        //
-        int count = nhalos;
         //printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (double) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(double));
         printf("nhalos = %d, size of shared memory = %lf (split)\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t));
         //
         cudaMemset(grid_grad2_a, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t));
         cudaMemset(grid_grad2_b, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t));
         cudaMemset(grid_grad2_c, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t));
         cudaMemset(grid_grad2_d, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t));
         //
         //module_potentialDerivatives_totalGradient_SOA_GPU<<<grid, threads>>> (grid_grad_x, grid_grad_y, lens, frame, nhalos, nbgridcells_x);
         module_potentialDerivatives_totalGradient2_SOA_GPU<<<grid, threads>>> (grid_grad2_a, grid_grad2_b,grid_grad2_c, grid_grad2_d,  lens_gpu, frame, nhalos, dx, dy, nbgridcells_x, nbgridcells_y, istart, jstart);
         cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU_8_SOA_GPU");
         //
         cudaDeviceSynchronize();
         printf("GPU kernel done...\n");
 }
 //
 //
 //
 void
 module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos)
 {
 	//
 	int GRID_SIZE_X = (nbgridcells + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks
 	int GRID_SIZE_Y = (nbgridcells + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y; 
 	//
 	type_t* timer = (type_t *) malloc((int) nbgridcells*nbgridcells*sizeof(type_t));
-	type_t* dtimer;
 	//
 	dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1);
 	dim3 grid   (GRID_SIZE_X , GRID_SIZE_Y);	
 	//
-	int count = nhalos;	
 	//printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(type_t));
 	printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t));
 	//
 	cudaMemset(grid_grad2_a, 0, nbgridcells*nbgridcells*sizeof(type_t));
 	cudaMemset(grid_grad2_b, 0, nbgridcells*nbgridcells*sizeof(type_t));
 	cudaMemset(grid_grad2_c, 0, nbgridcells*nbgridcells*sizeof(type_t));
 	cudaMemset(grid_grad2_d, 0, nbgridcells*nbgridcells*sizeof(type_t));
 	//
 	//module_potentialDerivatives_totalGradient2_SOA_GPU<<<grid, threads>>> (grid_grad2_a, grid_grad2_b,grid_grad2_c, grid_grad2_d, lens_gpu, frame, nbgridcells, nhalos);
 	cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU_8_SOA_GPU");
 	//
 	cudaDeviceSynchronize();
 	printf("GPU kernel done...\n");
 }
 
 
diff --git a/src/grid_gradient_GPU.cu b/src/grid_gradient_GPU.cu
index b25f4d3..d5b8db3 100644
--- a/src/grid_gradient_GPU.cu
+++ b/src/grid_gradient_GPU.cu
@@ -1,358 +1,343 @@
 //
 //
 //
 #include <fstream>
 #include "grid_gradient_GPU.cuh"
 #include "gradient_GPU.cuh"
 
 #define BLOCK_SIZE_X 32
 #define BLOCK_SIZE_Y 16
 
 //#define ROT
 
 #define _SHARED_MEM
 
 #ifdef _SHARED_MEM
 #define SHARED __shared__
 #warning "shared memory"
 extern __shared__ type_t shared[];
 #else
 #define SHARED 
 #endif
 
 #define Nx 1
 #define Ny 0
 
 
 #define cudasafe 
 
 extern "C" 
 {
 	type_t myseconds();
 }
 
 __global__ 
 void module_potentialDerivatives_totalGradient_SOA_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int nhalos);
 
 //
 void 
 module_potentialDerivatives_totalGradient_SOA_CPU_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart);
 //
 void
 module_potentialDerivatives_totalGradient_SOA_CPU_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos);
 //
 void gradient_grid_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart);
 
 //
 //void
 //module_potentialDerivatives_totalGradient_SOA_CPU_GPU_v2(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_cpu, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos);
 //
 //
 //
 void gradient_grid_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos ,int nbgridcells)
 {
 	type_t dx = (frame->xmax - frame->xmin)/(nbgridcells - 1);
         type_t dy = (frame->ymax - frame->ymin)/(nbgridcells - 1);
         //
         gradient_grid_GPU(grid_grad_x, grid_grad_y, frame, lens, nhalos, dx, dy, nbgridcells, nbgridcells, 0, 0);
 }
 //
 //
 //
 void gradient_grid_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart)
 {
 
 
 	int nBlocks_gpu = 0;
 	// Define the number of threads per block the GPU will use
 	cudaDeviceProp properties_gpu;
 
 	cudaGetDeviceProperties(&properties_gpu, 0); // Get properties of 0th GPU in use
 
 	if (properties_gpu.maxThreadsDim[0]<threadsPerBlock)
 	{
 		fprintf(stderr, "ERROR: The GPU has to support at least %u threads per block.\n", threadsPerBlock);
 		exit(-1);
 	}
 	else
 	{
 		nBlocks_gpu = properties_gpu.maxGridSize[0] / threadsPerBlock;  // Get the maximum number of blocks with the chosen number of threads
 		// per Block that the GPU supports
 	}
 
 	grid_param *frame_gpu;
 	Potential_SOA *lens_gpu,*lens_kernel;
 	int *type_gpu;
 	type_t *lens_x_gpu, *lens_y_gpu, *b0_gpu, *angle_gpu, *epot_gpu, *rcore_gpu, *rcut_gpu, *anglecos_gpu, *anglesin_gpu;
 	type_t *grid_grad_x_gpu, *grid_grad_y_gpu ;
 
 	lens_gpu = (Potential_SOA *) malloc(sizeof(Potential_SOA));
 	lens_gpu->type = (int *) malloc(sizeof(int));
 
 	// Allocate variables on the GPU
 	cudasafe(cudaMalloc( (void**)&(lens_kernel), sizeof(Potential_SOA)),"Gradientgpu.cu : Alloc Potential_SOA: " );
 	cudasafe(cudaMalloc( (void**)&(type_gpu), nhalos*sizeof(int)),"Gradientgpu.cu : Alloc type_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(lens_x_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc x_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(lens_y_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc y_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(b0_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc b0_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(angle_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc angle_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(epot_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc epot_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(rcore_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc rcore_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(rcut_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc rcut_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(anglecos_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc anglecos_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(anglesin_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc anglesin_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(frame_gpu), sizeof(grid_param)),"Gradientgpu.cu : Alloc frame_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(grid_grad_x_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradientgpu.cu : Alloc source_x_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(grid_grad_y_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradientgpu.cu : Alloc source_y_gpu: " );
 
 	// Copy values to the GPU
 	//
 	cudasafe(cudaMemcpy(type_gpu,lens->type , nhalos*sizeof(int),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy type_gpu: " );
 	cudasafe(cudaMemcpy(lens_x_gpu,lens->position_x , nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy x_gpu: " );
 	cudasafe(cudaMemcpy(lens_y_gpu,lens->position_y , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy y_gpu: " );
 	cudasafe(cudaMemcpy(b0_gpu,lens->b0 , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy b0_gpu: " );
 	cudasafe(cudaMemcpy(angle_gpu,lens->ellipticity_angle , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy angle_gpu: " );
 	cudasafe(cudaMemcpy(epot_gpu, lens->ellipticity_potential, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy epot_gpu: " );
 	cudasafe(cudaMemcpy(rcore_gpu, lens->rcore, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy rcore_gpu: " );
 	cudasafe(cudaMemcpy(rcut_gpu, lens->rcut, nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy rcut_gpu: " );
 	cudasafe(cudaMemcpy(anglecos_gpu, lens->anglecos, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy anglecos: " );
 	cudasafe(cudaMemcpy(anglesin_gpu, lens->anglesin, nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy anglesin: " );
 	cudasafe(cudaMemcpy(frame_gpu, frame, sizeof(grid_param), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy fame_gpu: " );
 	//
 	lens_gpu->type 			= type_gpu;
 	lens_gpu->position_x 		= lens_x_gpu;
 	lens_gpu->position_y 		= lens_y_gpu;
 	lens_gpu->b0 			= b0_gpu;
 	lens_gpu->ellipticity_angle 	= angle_gpu;
 	lens_gpu->ellipticity_potential = epot_gpu;
 	lens_gpu->rcore 		= rcore_gpu;
 	lens_gpu->rcut 			= rcut_gpu;
 	lens_gpu->anglecos 		= anglecos_gpu;
 	lens_gpu->anglesin 		= anglesin_gpu;
 	//
 	cudaMemcpy(lens_kernel, lens_gpu, sizeof(Potential_SOA), cudaMemcpyHostToDevice);
 	//
 	type_t time = -myseconds();
 	//module_potentialDerivatives_totalGradient_SOA_CPU_GPU(grid_grad_x_gpu, grid_grad_y_gpu, frame_gpu, lens_kernel, nbgridcells_x, nhalos);
 	module_potentialDerivatives_totalGradient_SOA_CPU_GPU(grid_grad_x_gpu, grid_grad_y_gpu, frame_gpu, lens_kernel, nhalos, dx, dy, nbgridcells_x, nbgridcells_y, istart, jstart);
 	//
 	//cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU");
 	cudaDeviceSynchronize();
 	time += myseconds();
 	//std::cout << "	kernel time = " << time << " s." << std::endl;
 	//
 
 	cudasafe(cudaMemcpy( grid_grad_x, grid_grad_x_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost )," --- Gradientgpu.cu : Copy source_x_gpu: " );
 	cudasafe(cudaMemcpy( grid_grad_y, grid_grad_y_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost)," --- Gradientgpu.cu : Copy source_y_gpu: " );
 	//
 	//printf("-----> %f %f \n",grid_grad_x[Nx], grid_grad_y[Ny]);
 	// Free GPU memory
 	cudaFree(lens_gpu);
 	cudaFree(type_gpu);
 	cudaFree(lens_x_gpu);
 	cudaFree(lens_y_gpu);
 	cudaFree(b0_gpu);
 	cudaFree(angle_gpu);
 	cudaFree(epot_gpu);
 	cudaFree(rcore_gpu);
 	cudaFree(rcut_gpu);
 	cudaFree(anglecos_gpu);
 	cudaFree(anglesin_gpu);
 	cudaFree(grid_grad_x_gpu);
 	cudaFree(grid_grad_y_gpu);
 }
 
 #if 0
 __global__
 void
 module_potentialDerivatives_totalGradient_8_SOA_GPU_loc(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA
  *lens, const struct grid_param *frame, int nbgridcells, int shalos, int nhalos)
 {
         //
         //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins");
         // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
         //
         type_t grad_x, grad_y;
         type_t clumpgrad_x, clumpgrad_y;
         type_t image_point_x, image_point_y;
         //
         SHARED type_t cosi      [200];
         SHARED type_t sinu      [200];
         SHARED type_t rci       [200];
         SHARED type_t b0        [200];
         SHARED type_t epsi      [200];
         SHARED type_t position_x[200];
         SHARED type_t position_y[200];
         SHARED type_t rsqe      [200];
         //SHARED type_t sgrad_x   [(BLOCK_SIZE_X + 1)*BLOCK_SIZE_Y];
         //SHARED type_t sgrad_y   [(BLOCK_SIZE_X + 1)*BLOCK_SIZE_Y];
 
         //SHARED type_t sonepeps  [200];
         //SHARED type_t sonemeps  [200];
         //
         grad_x         = 0;
         grad_y         = 0;
         //
         int row        = blockIdx.y*blockDim.y + threadIdx.y;
         int col        = blockIdx.x*blockDim.x + threadIdx.x;
         //
         //int loc_row    = threadIdx.x;
         //int loc_col    = threadIdx.y*blockDim.x + threadIdx.x;
         //
         //int grid_size  = nbgridcells/blockDim.y;
         //
         //if (threadIdx.x == 0) printf("%d %d %d: row = %d, col = %d, grid_size = %d\n", blockIdx.y, gridDim.y, threadIdx.y, row, col, grid_size);
         //
         type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
         type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
         //if (threadIdx.x == 0) printf("dx = %f, dy = %f\n", dx, dy);
         //
         image_point_x = frame->xmin + col*dx;
         image_point_y = frame->ymin + row*dy;
         //
         //int iloc  = threadIdx.x*blockDim.y + threadIdx.y;
         int iglob = row*nbgridcells + col;
         int numThreads = blockDim.x*blockDim.y;
         //
         for (int i = 0; i < (nhalos + numThreads - 1)/numThreads; ++i)
         {
                 int iloc  = threadIdx.y*blockDim.x + threadIdx.x + i*numThreads;
                 if (iloc < nhalos)
                 {
                         cosi[iloc]       = __ldg(&lens->anglecos             [shalos + iloc]);
                         sinu[iloc]       = __ldg(&lens->anglesin             [shalos + iloc]);
                         position_x[iloc] = __ldg(&lens->position_x           [shalos + iloc]);
                         position_y[iloc] = __ldg(&lens->position_y           [shalos + iloc]);
                         rci[iloc]        = __ldg(&lens->rcore                [shalos + iloc]);
                         b0[iloc]         = __ldg(&lens->b0                   [shalos + iloc]);
                         epsi[iloc]       = __ldg(&lens->ellipticity_potential[shalos + iloc]);
                         rsqe[iloc]       = sqrt(epsi[iloc]);
                 }
         }
         __syncthreads();
         //
         if ((row < nbgridcells) && (col < nbgridcells))
         {
                 //
                 for(int i = 0; i < nhalos; i++)
                 {
                         //int index  = iloc;
 #if 1
                         type_t rc      = rci[i];
                         type_t eps     = epsi[i];
                         type_t onemeps = 1 - eps;
                         type_t onepeps = 1 + eps;
                         //
                         type_t sqe = rsqe[i];
                         type_t cx1 = onemeps/onepeps;
                         //
                         //
                         //type_t zci_im = 1;
                         type_t zci_im  = -0.5*(1. - eps*eps)/sqe;
                         type_t inv_onepeps = 1./(onepeps*onepeps);
                         type_t inv_onemeps = 1./(onemeps*onemeps);
 #endif
                         //
                         {
                                 //KERNEL_8;
                                 type_t grad_x = grad_y = 0.;
                                 type_t true_coord_y = image_point_y - position_y[i];
                                 type_t true_coord_x = image_point_x - position_x[i];
                                 KERNEL_8_reg(0);
                                 grid_grad_x[iglob +  0] += grad_x;
                                 grid_grad_y[iglob +  0] += grad_y;
                         }
                         /*
                         {
                                 //KERNEL_8;
                                 type_t grad_x = grad_y = 0.;
                                 type_t true_coord_y = image_point_y - position_y[i];
                                 type_t true_coord_x = image_point_x - position_x[i] + BLOCK_SIZE_X/2*dx;
                                 KERNEL_8_reg(0);
                                 grid_grad_x[iglob + BLOCK_SIZE_X/2] += grad_x;
                                 grid_grad_y[iglob + BLOCK_SIZE_X/2] += grad_y;
                         }
                         */
                 //
                 }
         }
 }
 
 
 #endif
 void 
 module_potentialDerivatives_totalGradient_SOA_CPU_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart)
 {
-        struct point grad, clumpgrad;
-        //
-        grad.x = clumpgrad.x = 0.;
-        grad.y = clumpgrad.y = 0.;
-        //
-        int shalos = 0;
         //
         int GRID_SIZE_X = (nbgridcells_x + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks
         int GRID_SIZE_Y = (nbgridcells_y + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y;
 	//
         printf("grid_size_x = %d, grid_size_y = %d, nbgridcells_x = %d, nbgridcells_y = %d, istart = %d, jstart = %d (split)\n", GRID_SIZE_X, GRID_SIZE_Y, nbgridcells_x, nbgridcells_y, istart, jstart);
         //
         dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1);
         dim3 grid   (GRID_SIZE_X , GRID_SIZE_Y);
         //
-        int count = nhalos;
-        //printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (double) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(double))
-;
         printf("nhalos = %d, size of shared memory = %lf (split)\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t));
         //
         cudaMemset(grid_grad_x, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t));
         cudaMemset(grid_grad_y, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t));
 	//
         //module_potentialDerivatives_totalGradient_SOA_GPU<<<grid, threads>>> (grid_grad_x, grid_grad_y, lens, frame, nhalos, nbgridcells_x);
         module_potentialDerivatives_totalGradient_SOA_GPU<<<grid, threads>>> (grid_grad_x, grid_grad_y, lens_gpu, frame, nhalos, dx, dy, nbgridcells_x, nbgridcells_y, istart, jstart);
         cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU_8_SOA_GPU");
 	//
         cudaDeviceSynchronize();
         printf("GPU kernel done...\n");
 }
 //
 //
 //
 void
 module_potentialDerivatives_totalGradient_SOA_CPU_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos)
 {
-	struct point grad, clumpgrad;
-	//
-	grad.x = clumpgrad.x = 0.;
-	grad.y = clumpgrad.y = 0.;
-	//
-	int shalos = 0;
 	//
 	int GRID_SIZE_X = (nbgridcells + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks
 	int GRID_SIZE_Y = (nbgridcells + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y; 
 	//printf("grid_size_x = %d, grid_size_y = %d\n", GRID_SIZE_X, GRID_SIZE_Y);
 	//
 	type_t* timer = (type_t *) malloc((int) nbgridcells*nbgridcells*sizeof(type_t));
 	type_t* dtimer;
 	//cudasafe(cudaMalloc( (void**)&(dtimer), (int) nbgridcells*nbgridcells*sizeof(type_t)),"Gradientgpu.cu : totalGradient_SOA_CPU_GPU: " );
 	//
 	//{
 	//dim3 dimBlock(BLOCK_SIZE_X/1, BLOCK_SIZE_Y);
 	//dim3 dimGrid (GRID_SIZE_X   , GRID_SIZE_Y );	
 	dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1);
 	dim3 grid   (GRID_SIZE_X , GRID_SIZE_Y);	
 	//
-	int count = nhalos;	
+	//int count = nhalos;
 	//printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(type_t));
 	printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t));
 	//
 	cudaMemset(grid_grad_x, 0, nbgridcells*nbgridcells*sizeof(type_t));
 	cudaMemset(grid_grad_y, 0, nbgridcells*nbgridcells*sizeof(type_t));
 		//
 	module_potentialDerivatives_totalGradient_SOA_GPU<<<grid, threads>>> (grid_grad_x, grid_grad_y, lens_gpu, frame, nbgridcells, nhalos);
 	cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU_8_SOA_GPU");
 		
 	//}
 	cudaDeviceSynchronize();
 	printf("GPU kernel done...\n");
 }
 
 
diff --git a/src/grid_map_GPU.cu b/src/grid_map_GPU.cu
index e81bbbd..52dc3df 100644
--- a/src/grid_map_GPU.cu
+++ b/src/grid_map_GPU.cu
@@ -1,267 +1,262 @@
 //
 //
 //
 #include <fstream>
 #include "grid_map_GPU.cuh"
 #include "gradient2_GPU.cuh"
 #include <structure_hpc.hpp>
 
 #define BLOCK_SIZE_X 32
 #define BLOCK_SIZE_Y 16
 
 //#define ROT
 
 #define _SHARED_MEM
 
 #ifdef _SHARED_MEM
 #define SHARED __shared__
 #warning "shared memory"
 extern __shared__ type_t shared[];
 #else
 #define SHARED 
 #endif
 
 #define Nx 1
 #define Ny 0
 
 
 #define cudasafe 
 
 extern "C" 
 {
 	type_t myseconds();
 }
 //
 //
 //void amplif_grid_CPU_GPU(type_t *map,type_t *grid_grad2_a,type_t *grid_grad2_b,type_t *grid_grad2_c,type_t *grid_grad2_d, type_t dl0s, type_t z,int mode_amp,int nhalos,int nbgridcells_x, int nbgridcells_y);
 //map_gpu_function_t amplif_grid_CPU_GPU(type_t *map,type_t *grid_grad2_a,type_t *grid_grad2_b,type_t *grid_grad2_c,type_t *grid_grad2_d, type_t dl0s, type_t z,int mode_amp,int nhalos,int nbgridcells_x, int nbgridcells_y);
 
 __global__ void amplif_grid_GPU(type_t *ampli,type_t *grid_grad2_a,type_t *grid_grad2_b,type_t *grid_grad2_c,type_t *grid_grad2_d, type_t dl0s, type_t z,int nbgridcells);
 //
 void map_grid_GPU(map_gpu_function_t mapfunction, type_t *map, const struct cosmo_param *cosmo, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos ,int nbgridcells,int mode_amp, type_t z )
 {
 	type_t dx = (frame->xmax - frame->xmin)/(nbgridcells - 1);
     type_t dy = (frame->ymax - frame->ymin)/(nbgridcells - 1);
         //
     map_grid_GPU(mapfunction,map,cosmo, frame, lens, nhalos,mode_amp,z, dx, dy, nbgridcells, nbgridcells, 0, 0);
 }
 //
 void map_grid_GPU(map_gpu_function_t mapfunction, type_t *map,const struct cosmo_param *cosmo, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, int mode_amp, type_t z, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart)
 {
 
 	int nBlocks_gpu = 0;
 	// Define the number of threads per block the GPU will use
 	cudaDeviceProp properties_gpu;
-
 	cudaGetDeviceProperties(&properties_gpu, 0); // Get properties of 0th GPU in use
 
 	if (properties_gpu.maxThreadsDim[0]<threadsPerBlock)
 	{
 		fprintf(stderr, "ERROR: The GPU has to support at least %u threads per block.\n", threadsPerBlock);
 		exit(-1);
 	}
 	else
 	{
 		nBlocks_gpu = properties_gpu.maxGridSize[0] / threadsPerBlock;  // Get the maximum number of blocks with the chosen number of threads
 		// per Block that the GPU supports
 	}
 
 	grid_param *frame_gpu;
 	Potential_SOA *lens_gpu,*lens_kernel;
 	int *type_gpu;
 	type_t *lens_x_gpu, *lens_y_gpu, *b0_gpu, *angle_gpu, *epot_gpu, *rcore_gpu, *rcut_gpu, *anglecos_gpu, *anglesin_gpu;
 	type_t *grid_grad2_a_gpu, *grid_grad2_b_gpu , *grid_grad2_c_gpu, *grid_grad2_d_gpu, *map_gpu;
 
 	type_t dl0s = module_cosmodistances_objectObject(lens->z[0], z, *cosmo);
 	//type_t dos = module_cosmodistances_observerObject(z, *cosmo);
 
 	lens_gpu = (Potential_SOA *) malloc(sizeof(Potential_SOA));
 	lens_gpu->type = (int *) malloc(sizeof(int));
 
 	// Allocate variables on the GPU
 	cudasafe(cudaMalloc( (void**)&(lens_kernel), sizeof(Potential_SOA)),"Gradient2gpu.cu : Alloc Potential_SOA: " );
 	cudasafe(cudaMalloc( (void**)&(type_gpu), nhalos*sizeof(int)),"Gradient2gpu.cu : Alloc type_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(lens_x_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc x_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(lens_y_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc y_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(b0_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc b0_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(angle_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc angle_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(epot_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc epot_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(rcore_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc rcore_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(rcut_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc rcut_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(anglecos_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc anglecos_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(anglesin_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc anglesin_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(frame_gpu), sizeof(grid_param)),"Gradient2gpu.cu : Alloc frame_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(grid_grad2_a_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_a_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(grid_grad2_b_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_b_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(grid_grad2_c_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_c_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(grid_grad2_d_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_d_gpu: " );
 	cudasafe(cudaMalloc( (void**)&(map_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc map: " );
 	// Copy values to the GPU
 	//
 	cudasafe(cudaMemcpy(type_gpu,lens->type , nhalos*sizeof(int),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy type_gpu: " );
 	cudasafe(cudaMemcpy(lens_x_gpu,lens->position_x , nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy x_gpu: " );
 	cudasafe(cudaMemcpy(lens_y_gpu,lens->position_y , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy y_gpu: " );
 	cudasafe(cudaMemcpy(b0_gpu,lens->b0 , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2pu.cu : Copy b0_gpu: " );
 	cudasafe(cudaMemcpy(angle_gpu,lens->ellipticity_angle , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy angle_gpu: " );
 	cudasafe(cudaMemcpy(epot_gpu, lens->ellipticity_potential, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy epot_gpu: " );
 	cudasafe(cudaMemcpy(rcore_gpu, lens->rcore, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy rcore_gpu: " );
 	cudasafe(cudaMemcpy(rcut_gpu, lens->rcut, nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy rcut_gpu: " );
 	cudasafe(cudaMemcpy(anglecos_gpu, lens->anglecos, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy anglecos: " );
 	cudasafe(cudaMemcpy(anglesin_gpu, lens->anglesin, nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy anglesin: " );
 	cudasafe(cudaMemcpy(frame_gpu, frame, sizeof(grid_param), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy fame_gpu: " );
 	//
 	lens_gpu->type 			= type_gpu;
 	lens_gpu->position_x 		= lens_x_gpu;
 	lens_gpu->position_y 		= lens_y_gpu;
 	lens_gpu->b0 			= b0_gpu;
 	lens_gpu->ellipticity_angle 	= angle_gpu;
 	lens_gpu->ellipticity_potential = epot_gpu;
 	lens_gpu->rcore 		= rcore_gpu;
 	lens_gpu->rcut 			= rcut_gpu;
 	lens_gpu->anglecos 		= anglecos_gpu;
 	lens_gpu->anglesin 		= anglesin_gpu;
 	//
 	cudaMemcpy(lens_kernel, lens_gpu, sizeof(Potential_SOA), cudaMemcpyHostToDevice);
 	//
 	type_t time = -myseconds();
 	//
 	module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(grid_grad2_a_gpu, grid_grad2_b_gpu, grid_grad2_c_gpu, grid_grad2_d_gpu, frame_gpu, lens_kernel, nhalos, dx, dy, nbgridcells_x, nbgridcells_y, istart, jstart);
 	//
 	mapfunction(map_gpu,grid_grad2_a_gpu, grid_grad2_b_gpu, grid_grad2_c_gpu, grid_grad2_d_gpu,dl0s,z,mode_amp,nhalos,nbgridcells_x,nbgridcells_y);
 	//amplif_grid_CPU_GPU(map_gpu,grid_grad2_a_gpu, grid_grad2_b_gpu, grid_grad2_c_gpu, grid_grad2_d_gpu,dl0s,z,mode_amp,nhalos,nbgridcells_x,nbgridcells_y);
 	//cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU");
 	cudaDeviceSynchronize();
 
 	//
 
 
 	cudasafe(cudaMemcpy( map, map_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost )," --- Gradient2gpu.cu : Copy source_a_gpu: " );
 	//cudasafe(cudaMemcpy( grid_grad2_b, grid_grad2_b_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost)," --- Gradient2gpu.cu : Copy source_b_gpu: " );
 	//cudasafe(cudaMemcpy( grid_grad2_c, grid_grad2_c_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost )," --- Gradient2gpu.cu : Copy source_c_gpu: " );
 	//cudasafe(cudaMemcpy( grid_grad2_d, grid_grad2_d_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost)," --- Gradient2gpu.cu : Copy source_d_gpu: " );
 	//
 	time += myseconds();
 	std::cout << "	kernel time = " << time << " s." << std::endl;
 	//printf("-----> %f %f \n",grid_grad_x[Nx], grid_grad_y[Ny]);
 	// Free GPU memory
 	cudaFree(lens_gpu);
 	cudaFree(type_gpu);
 	cudaFree(lens_x_gpu);
 	cudaFree(lens_y_gpu);
 	cudaFree(b0_gpu);
 	cudaFree(angle_gpu);
 	cudaFree(epot_gpu);
 	cudaFree(rcore_gpu);
 	cudaFree(rcut_gpu);
 	cudaFree(anglecos_gpu);
 	cudaFree(anglesin_gpu);
 	cudaFree(grid_grad2_a_gpu);
 	cudaFree(grid_grad2_b_gpu);
 	cudaFree(grid_grad2_c_gpu);
 	cudaFree(grid_grad2_d_gpu);
 	cudaFree(map_gpu);
 }
 
 void amplif_grid_CPU_GPU(type_t *map,type_t *grid_grad2_a,type_t *grid_grad2_b,type_t *grid_grad2_c,type_t *grid_grad2_d, type_t dl0s, type_t z,int mode_amp, int nhalos,int nbgridcells_x, int nbgridcells_y)
 {
         int GRID_SIZE_X = (nbgridcells_x + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks
         int GRID_SIZE_Y = (nbgridcells_y + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y;
         //
         //printf("grid_size_x = %d, grid_size_y = %d, nbgridcells_x = %d, nbgridcells_y = %d, istart = %d, jstart = %d (split)\n", GRID_SIZE_X, GRID_SIZE_Y, nbgridcells_x, nbgridcells_y, istart, jstart);
         //
         dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1);
         dim3 grid   (GRID_SIZE_X , GRID_SIZE_Y);
         //
-        int count = nhalos;
-        //printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (double) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(double));
         printf("nhalos = %d, size of shared memory = %lf (split)\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t));
         //
         cudaMemset(map, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t));
         //
         //module_potentialDerivatives_totalGradient_SOA_GPU<<<grid, threads>>> (grid_grad_x, grid_grad_y, lens, frame, nhalos, nbgridcells_x);
         amplif_grid_GPU<<<grid, threads>>> (map,grid_grad2_a, grid_grad2_b,grid_grad2_c, grid_grad2_d,dl0s,z,nbgridcells_x);
         cudasafe(cudaGetLastError(), "amplif_grid_CPU_GPU");
         //
         cudaDeviceSynchronize();
         printf("GPU kernel done...\n");
 }
 
 __global__
 void
 amplif_grid_GPU(type_t *ampli,type_t *grid_grad2_a,type_t *grid_grad2_b,type_t *grid_grad2_c,type_t *grid_grad2_d, type_t dl0s, type_t z,int nbgridcells)
 {
-    struct point image_point;
-	struct matrix grad, clumpgrad;
 	//
     int col = blockIdx.x*blockDim.x + threadIdx.x;
     int row = blockIdx.y*blockDim.y + threadIdx.y;
     //
     if ((row < nbgridcells) && (col < nbgridcells))
     {
     	int index = row*nbgridcells + col;
         type_t kappa = (grid_grad2_a[index] + grid_grad2_c[index]) / 2.;
         ampli[index] = kappa;
     }
 
 }
 
 #if 0
 void module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart)
 {
         int GRID_SIZE_X = (nbgridcells_x + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks
         int GRID_SIZE_Y = (nbgridcells_y + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y;
         //
         printf("grid_size_x = %d, grid_size_y = %d, nbgridcells_x = %d, nbgridcells_y = %d, istart = %d, jstart = %d (split)\n", GRID_SIZE_X, GRID_SIZE_Y, nbgridcells_x, nbgridcells_y, istart, jstart);
         //
         dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1);
         dim3 grid   (GRID_SIZE_X , GRID_SIZE_Y);
         //
         int count = nhalos;
         //printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (double) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(double));
         printf("nhalos = %d, size of shared memory = %lf (split)\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t));
         //
         cudaMemset(grid_grad2_a, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t));
         cudaMemset(grid_grad2_b, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t));
         cudaMemset(grid_grad2_c, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t));
         cudaMemset(grid_grad2_d, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t));
         //
         //module_potentialDerivatives_totalGradient_SOA_GPU<<<grid, threads>>> (grid_grad_x, grid_grad_y, lens, frame, nhalos, nbgridcells_x);
         module_potentialDerivatives_totalGradient2_SOA_GPU<<<grid, threads>>> (grid_grad2_a, grid_grad2_b,grid_grad2_c, grid_grad2_d,  lens_gpu, frame, nhalos, dx, dy, nbgridcells_x, nbgridcells_y, istart, jstart);
         cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU_8_SOA_GPU");
         //
         cudaDeviceSynchronize();
         printf("GPU kernel done...\n");
 }
 //
 //
 //
 void
 module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos)
 {
 	//
 	int GRID_SIZE_X = (nbgridcells + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks
 	int GRID_SIZE_Y = (nbgridcells + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y; 
 	//
 	type_t* timer = (type_t *) malloc((int) nbgridcells*nbgridcells*sizeof(type_t));
 	type_t* dtimer;
 	//
 	dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1);
 	dim3 grid   (GRID_SIZE_X , GRID_SIZE_Y);	
 	//
 	int count = nhalos;	
 	//printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(type_t));
 	printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t));
 	//
 	cudaMemset(grid_grad2_a, 0, nbgridcells*nbgridcells*sizeof(type_t));
 	cudaMemset(grid_grad2_b, 0, nbgridcells*nbgridcells*sizeof(type_t));
 	cudaMemset(grid_grad2_c, 0, nbgridcells*nbgridcells*sizeof(type_t));
 	cudaMemset(grid_grad2_d, 0, nbgridcells*nbgridcells*sizeof(type_t));
 	//
 	//module_potentialDerivatives_totalGradient2_SOA_GPU<<<grid, threads>>> (grid_grad2_a, grid_grad2_b,grid_grad2_c, grid_grad2_d, lens_gpu, frame, nbgridcells, nhalos);
 	cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU_8_SOA_GPU");
 	//
 	cudaDeviceSynchronize();
 	printf("GPU kernel done...\n");
 }
 #endif
 
diff --git a/src/module_cosmodistances.cpp b/src/module_cosmodistances.cpp
index a5a0843..7b5582c 100644
--- a/src/module_cosmodistances.cpp
+++ b/src/module_cosmodistances.cpp
@@ -1,371 +1,371 @@
 /**
 * @file   module_cosmodistances.cpp
 * @Author Thomas Jalabert, EPFL (me@example.com)
 * @date   July 2015
 * @version 0,1
 * @brief  Library for the computation of cosmological ratios
 *
 * compute the cosmological ratio of the distances between the lens and the source and the lens and the observer
 *
 */
 
 
 
 
 /// include header file
 #include <string>
 #include <stdio.h>
 #include <math.h>
 #include <cstring>
 #include <stdlib.h>
 #include "module_cosmodistances.hpp"
 #include "gsl/gsl_integration.h"
 
 
 
 
 // Declare static functions that will only be used in this module
 // The functions are defined further below
 static type_t module_cosmodistances_cosmo_root(type_t z,cosmo_param C);
 static type_t module_cosmodistances_sk(type_t x, type_t k);
 static type_t module_cosmodistances_chi1(type_t z,cosmo_param C);
 static type_t module_cosmodistances_chi2(type_t z1, type_t z2,cosmo_param C);
 static type_t module_cosmodistances_chiz(type_t z,cosmo_param C);
 static type_t module_cosmodistances_integral_chiz_ab(type_t a, type_t b,cosmo_param C);
 
 void module_cosmodistances_relativecoordinates_XY( type_t *x, type_t *y, int iref, type_t ref_ra, type_t ref_dec )
 {
 	type_t DTR=acos(-1.)/180.;
     // Convert the input values to absolute WCS coordinates
     if ( iref == 1 || iref == 3 )
     {
         *x /= -3600.*cos(ref_dec * DTR);
         *x += ref_ra;
         *y /= 3600.;
         *y += ref_dec;
     }
     else if ( iref == 2 ) // image coordinates
     {
         *x += ref_ra;
         *y += ref_dec;
     }
 }
 
 // Function defintions
 //==========================================================================================================
 
 /** @brief Calculates ratio distance_(lens-source)/distance_(source)
 *   Calculates ratio distance_(lens-source)/distance_(source)
 * @param nsetofimages     number of set of images
 * @param nImagesSet     set of images
 * @param z_lens        redshift of lens
 * @param source        sources
 * @param cosmoratio     variable where result is stored
 * @param cosmopar      cosmological parameter
 */
 void module_cosmodistances_lensSourceToSource( const int nsetofimages, int nImagesSet[], type_t z_lens, galaxy source[], type_t cosmoratio[], cosmo_param cosmopar){
 
 
         //int imageCounter = 0;  // Count the total number of images up to now
   for(int i=0; i<nsetofimages; i++){
     //printf("z_lens %f , imag.redshift %f \n" , z_lens,source[0].redshift);
     cosmoratio[i]=module_cosmodistances_lensSourceToObserverSource(z_lens,source[i].redshift, cosmopar); /// lens efficiency (angular distance)
                 //imageCounter += (nImagesSet[i] - 1);  // We add the number of images to skip to get to the next set
   }
 }
 
 
 
 
 /** @brief Return the angular distance DA(observator(z=0),object(z)) (no unit)
 * Multiply observerObject(z) by c/H0 to get the true value in Mpc.
 * observerObject(z) * c/H0 = DA(0,z) = 1/(1+z) * Sk( integral( 0,z,c*dz/H(z) ) )
 
 * @param z          redshift of object
 * @param cosmopar      cosmological parameter
 */
 type_t module_cosmodistances_observerObject(type_t z, cosmo_param cosmopar)
 
 {
     type_t g;
 
     if (cosmopar.omegaX == 0.)
     {
-    	printf("Omega M HPC %f \n",cosmopar.omegaM );
+    	//printf("Omega M HPC %f \n",cosmopar.omegaM );
         g = module_cosmodistances_cosmo_root(z,cosmopar);
         // Reformulation of the Mattig relation of OL = OK = 0 (De Sitter)
         return(2.*((1. - cosmopar.omegaM - g)*(1. - g)) / cosmopar.omegaM / cosmopar.omegaM / (1. + z) / (1. + z));
     }
     else
     {
         if (cosmopar.curvature != 0.)
             return(module_cosmodistances_sk(module_cosmodistances_chi1(z,cosmopar)*sqrt(fabs(cosmopar.curvature)), cosmopar.curvature)
                    / (1 + z) / sqrt(fabs(cosmopar.curvature)));
         else
             return(module_cosmodistances_chi1(z,cosmopar) / (1 + z));
     }
 }
 
 
 
 
 
 /** @brief Return the angular distance DA(object(z1),object(z2)) divided by c/H0
 * Return the angular distance DA(object(z1),object(z2)) divided by c/H0
 * objectObject(z1,z2) * c/H0 = DA(z1,z2) = 1/(1+z2) * Sk( integral( z1,z2,c*dz/H(z) ) )
 *
 * @param z1          redshift of object 1
 * @param z2          redshift of object 2
 * @param cosmopar      cosmological parameter
 */
 type_t module_cosmodistances_objectObject(type_t z1, type_t z2, cosmo_param cosmopar)
 {
     type_t  g1, g2;
 
     if ( z1 >= z2 )
         return 0.;
 
     if (cosmopar.omegaX == 0.)
     {
         g1 = module_cosmodistances_cosmo_root(z1,cosmopar);
         g2 = module_cosmodistances_cosmo_root(z2,cosmopar);
         // Mattig relation for a De Sitter Universe
         return(2.*((1. - cosmopar.omegaM - g1*g2)*(g1 - g2))
                / cosmopar.omegaM / cosmopar.omegaM / (1. + z1) / (1. + z2) / (1. + z2));
     }
     else
     {
         if ( cosmopar.curvature != 0. )
             return(module_cosmodistances_sk(module_cosmodistances_chi2(z1, z2,cosmopar)*sqrt(fabs(cosmopar.curvature)), cosmopar.curvature)
                    / (1 + z2) / sqrt(fabs(cosmopar.curvature)));
         else
             return(module_cosmodistances_chi2(z1, z2,cosmopar) / (1 + z2));
     }
 }
 
 
 
 
 /****************************************************************/
 
  /** @brief Return the lens efficacity E=DA(LS) / DA(OS)
 * If zl > zs, return 0.
 *
 * @param zl          redshift lens
 * @param zs          redshift source
 * @param cosmopar      cosmological parameter
 */
 
 type_t module_cosmodistances_lensSourceToObserverSource(type_t zl, type_t zs, cosmo_param cosmopar)
 {
     if ( zl >= zs ){
   printf("*******************\nWarning, a source is between the lens and the observer\n*******************\n");
   printf("Zl: %f , ZS: %f \n", zl, zs);
         return (0.);
     }
         return (module_cosmodistances_objectObject(zl, zs, cosmopar) / module_cosmodistances_observerObject(zs, cosmopar));
 }
 
 
 
  /** @brief Calculate square root
 *
 * @param z      redshift
 * @param C      cosmological parameter
 */
 
 // Calculate square root
 static type_t module_cosmodistances_cosmo_root(type_t z,cosmo_param C)
 {
     return(sqrt(1. + C.omegaM*z));
 }
 
 
 
 
 // Calculate S_k for cosmology
  /** @brief Calculate S_k for cosmology
 *
 * @param x
 * @param k      curvature
 */
 static type_t module_cosmodistances_sk(type_t x, type_t k)
 {
     if (k > 0)
         return(sin(x));
     else if (k < 0)
         return(sinh(x));
     else
         return(x);
 }
 
 
  /** @brief Return 1 / H(z) multiplied by H0
 * chiz(z) / H0 = 1 / H(z) = 1/H0/(1+z)/sqrt( sum(i, Omega_i*(1+z)^(3*(w_i + 1) ) )
 *
 * @param z      redshift
 * @param C      cosmological parameter
 */
 
 static type_t module_cosmodistances_chiz(type_t z,cosmo_param C)
 {
     type_t x;
     type_t yy, yyy, y4;
     type_t r0, e1, e2, frac;
 
     x = 1 + z;
 
 
     switch (C.model)       //TV  CPL Model
     {
         case(1):
             x = -x * x * C.curvature + x * x * x * C.omegaM + C.omegaX * pow(x, 3 * (1 + C.wX + C.wa)) * exp(-3 * C.wa * z / x);
             break;
         case(2):        //TV Cardassian (wx is q, wa is n)
             yy = pow ( (1.+C.curvature)/C.omegaM  ,C.wX);
             yyy = (yy - 1.) * pow( x,3.*C.wX*(C.wa-1.) );
             y4 = pow(1.+yyy,1./C.wX);
             x = -x * x * C.curvature + x * x * x * C.omegaM*y4;
             break;
         case(3):        //TV Interacting DE Model (wa is delta)
             yy = C.omegaX*pow( x,3.*(1.+C.wX) );
             yyy = ( C.omegaM/(C.wa+3.*C.wX) ) * (  C.wa*pow(x,3.*(1.+C.wX))   +  3.*C.wX*pow(x,(3.-C.wa))   );
             x = -x * x * C.curvature + yy +yyy;
             break;
         case(4):        //TV Holographic Ricci Scale with CPL
             r0 = C.omegaM/(1.-C.omegaM);
             e1 =  (3./2.)*(  (1.+r0+C.wX+4*C.wa)/(1.+r0+3.*C.wa) );
             e2 =  (-1./2.)*(  (1.+r0-3.*C.wX)/(1.+r0+3.*C.wa) )  ;
             frac =  ( 1.+r0+3.*C.wa*(x-1.)/x )/(1.+r0)  ;
             yy = pow(x,e1);
             yyy = pow(frac,e2);
             x = (yy*yyy)*(yy*yyy);
             break;
         default:
             printf("ERROR: Unknown cosmological model %d\n", C.model);
             exit(-1);
 
     }
 
     if ( x <= 0 )
     {
         printf("ERROR : H^2(z)<=0 produced (z,omegaM,omegaX,wX,wa) = (%.3lf,%.3lf,%.3lf,%.3lf,%.3lf)\n",
                  z, C.omegaM, C.omegaX, C.wX, C.wa );
          exit(-1);
     }
     return( 1. / sqrt(x) );
 }
 
 
 
  /** @brief Return the proper distance Dp(0,z) divided by c/H0
 * chi1(z) * c/H0 = Dp(0,z) = integral( 0, z, c*dz / H(z) )
 *
 * @param z      redshift
 * @param C      cosmological parameter
 */
 static type_t module_cosmodistances_chi1(type_t z,cosmo_param C)
 {
     type_t rez;
     rez = module_cosmodistances_integral_chiz_ab(0., z,C);
     return rez;
 }
 
 
 
  /** @brief Return the proper distance Dp(z1,z2) divided by c/H0
 * chi2(z) * c/H0 = Dp(z1,z2) = integral( z1, z2, c*dz / H(z) )
 *
 * @param zl      redshift lens
 * @param zs      redshift source
 * @param C      cosmological parameter
 */
 static type_t module_cosmodistances_chi2(type_t z1, type_t z2,cosmo_param C)
 {
    type_t rez;
    rez = module_cosmodistances_integral_chiz_ab(z1, z2,C);
 
    return rez;
 }
 
 
  /** @brief compute the integral of H0/H(z) by trapezes method
 * compute the integral of H0/H(z) by trapezes method
 * @param a,b, cosmologicalparameters cosmopar
 *
 * @param a
 * @param b
 * @param C    cosmological parameter
 */
 static double chiz_gsl(double z, void* param)
 {
 	struct cosmo_param * cosmo  = (struct cosmo_param *)param;
    return module_cosmodistances_chiz(z,*cosmo);
 }
 
 static type_t module_cosmodistances_integral_chiz_ab(type_t a, type_t b,cosmo_param C)
 {
 /*
    int i,nit;
    type_t res, epsilon=1.e-5; /// accuracy of the integration, slows down cosmo calculation hugely, investigate lenstool gsl (need e-6)?
 
    nit=(b-a)/epsilon;
    res=epsilon*(module_cosmodistances_chiz(a,C)+module_cosmodistances_chiz(b,C))/2.;
 
    for(i=1;i<nit;i++)
    {
        res=res+epsilon*module_cosmodistances_chiz(a+i*epsilon,C);
    }
 
    return res;*/
 
    gsl_function f;
    f.function = &chiz_gsl;
    f.params = &C;
    const int limit = 10;
    gsl_integration_workspace * w1 = gsl_integration_workspace_alloc(limit);
    double rez, err;
    gsl_integration_qag(&f, a, b, 0, 1e-6, limit, GSL_INTEG_GAUSS15, w1 , &rez ,&err);
    gsl_integration_workspace_free(w1);
    return rez;
 }
 
 
 
 
 
 
  /** @brief Debug function to print the calculated output of the functions
   * Debug function to print the calculated output of the functions
 */
 
 // Debug function to print the calculated output of the functions
 
 int module_cosmodistances_debug(int runmode[], type_t strongLensingRatios_lensSourceToSource[], type_t weakLensingRatios_lensSourceToSource[], type_t weakLensing_observerSource[], int numberCleanLens, type_t cleanlensRatios_lensSourceToSource[], type_t cleanlens_observerSource[], std::string DEBUG)
 {
 if (strcasecmp(DEBUG.c_str(), "True") == 0)  // If we are in debug mode
 {
 
   if (runmode[0] == 1 or runmode[1] == 1)  // If we have strong lensing
   {
       printf("DEBUG: Strong lensing cosmo ratios D_LS/D_OS for first 2 sets: %lf, %lf\n\n", strongLensingRatios_lensSourceToSource[0], strongLensingRatios_lensSourceToSource[1]);
   };
   if (runmode[2] == 1)  // We have weak lensing
   {
       printf("DEBUG: Weak lensing D_LS/D_OS for first 2 arclets: %lf, %lf. Weak lensing D_OS for first 2 arclets: %lf, %lf.\n\n", weakLensingRatios_lensSourceToSource[0], weakLensingRatios_lensSourceToSource[1], weakLensing_observerSource[0], weakLensing_observerSource[1]);
   };
   if (numberCleanLens > 0)  // We have cleanlens mode
   {
       printf("DEBUG: Cleanlens D_LS/D_OS for first 2 sources: %lf, %lf. Cleanlens D_OS for first 2 sources: %lf, %lf.\n\n", cleanlensRatios_lensSourceToSource[0], cleanlensRatios_lensSourceToSource[1], cleanlens_observerSource[0], cleanlens_observerSource[1]);
   };
 
 
 };
 
 return 0;
 }
 
 
 
 
 
diff --git a/src/module_readParameters.cpp b/src/module_readParameters.cpp
index b40f609..ffce15a 100644
--- a/src/module_readParameters.cpp
+++ b/src/module_readParameters.cpp
@@ -1,2744 +1,2744 @@
 /**
 * @file   module_readParameters.cpp
 * @Author Christoph Schaefer, EPFL (christophernstrerne.schaefer@epfl.ch)
 * @date   July 2015
 * @version 0,1
 * @brief  read parameters, images, sources and any other relevant information
 *
 * read parameter file
 * 
 */
 
 
 
 // Include
 //===========================================================================================================
 #include <iostream>
 #include <cstring>
 #include <string.h>
 #include <sstream>
 #include <stdlib.h>
 #include <stdio.h>
 #include <fstream>
 #include <cmath>
 #include "module_readParameters.hpp"
 #include <iomanip>
 
 
 
 
 
 //===========================================================================================================
 // Function definitions
 
 /**
  *
  * bayes.dat each # corresponds to one param available, it has to be in the same folder (can be changed)
  *
  * bayespot = array with bayes values
  * nparam = number of param available
  * nvalues = number of bayes potentials
  *
  */
 
 void module_readParameters_preparebayes(int &nparam, int &nvalues){
 
 	nparam = 0;
 	nvalues = 0;
 	//Counting lines
     std::string line;
     std::ifstream IM("bayes.dat",std::ios::in);
 	if ( IM )
 	{
 		while( std::getline(IM,line) )  // Read every line
 		{
 		if ( strncmp(line.c_str(), "#", 1) == 0){	//Skipping commented lines
 			nparam += 1;
 			continue;
 		}
 		nvalues +=1;
 		}
 	}
 	nvalues -= 1; //exclude last empty line
 	//nparam -=1; // Nparam is excluded
-	std::cerr << "nvalues :" << nvalues << "nparam :" << nparam << std::endl;
+	//std::cerr << "nvalues :" << nvalues << "nparam :" << nparam << std::endl;
 	IM.close();
 
 }
 
 void module_readParameters_bayesmodels(double * bayespot, int nparam, int nvalues){
 
     std::string line;
     std::ifstream IM("bayes.dat",std::ios::in);
     std::stringstream streamline;
     int j = 0;
 	if ( IM )
 	{
 		while( std::getline(IM,line) )  // Read every line
 		{
 		if ( strncmp(line.c_str(), "#", 1) == 0){	//Skipping commented lines
 			continue;
 		}
 		streamline << line;
 		for(int i = 0; i < nparam; i++){
 			streamline >> bayespot[j * nparam + i];
 			//IM >> bayespot[j * nparam + i];
 			std::cerr << bayespot[j * nparam + i] << " " ;
 		}
 		streamline.clear();
 		std::cerr << std::endl;
 		j += 1;
 		}
 
 	}
 
 	IM.close();
 
 
 }
 
 /**setting potential using bayes[index] values. Does not support changing the bayes files config output defined by
 // Parameter constants in structure.h
 #define CX       0
 #define CY       1
 #define EPOT     2
 #define EMASS    3
 #define THETA    4
 #define PHI      5
 #define RC       6
 #define B0       7
 #define ALPHA    8
 #define BETA     9
 #define RCUT    10
 #define MASSE   11
 #define ZLENS   12
 #define RCSLOPE 13
 #define PMASS   14
 */
 
 void module_readParameters_setbayesmapmodels(const runmode_param* runmode, const cosmo_param* cosmology, const potentialoptimization* limit, potfile_param* potfile, Potential_SOA* lenses, double * bayespot, int nparam, int index){
 
 	int param_index = 2;
 	double DTR=acos(-1.)/180.;	/* 1 deg in rad  = pi/180 */
 	for(int i = 0; i < runmode->nhalos; i++){
 		if(limit[i].position.x.block == 1){
 			lenses->position_x[i] = bayespot[index*nparam+param_index];
 			param_index++;
 		}
 		if(limit[i].position.y.block == 1){
 			lenses->position_y[i] = bayespot[index*nparam+param_index];
 			param_index++;
 		}
 		if(limit[i].ellipticity_potential.block == 1){
 			lenses->ellipticity_potential[i] = bayespot[index*nparam+param_index];
 			param_index++;
 		}
 		if(limit[i].ellipticity.block == 1){
 			lenses->ellipticity[i] = bayespot[index*nparam+param_index];
 			param_index++;
 		}
 		if(limit[i].ellipticity_angle.block == 1){
 			lenses->ellipticity_angle[i] = bayespot[index*nparam+param_index]* DTR;
 			lenses->anglecos[i] = cos(lenses->ellipticity_angle[i]);
 			lenses->anglesin[i] = sin(lenses->ellipticity_angle[i]);
 			param_index++;
 		}
 		if(limit[i].rcore.block == 1){
 			lenses->rcore[i] = bayespot[index*nparam+param_index];
 			param_index++;
 		}
 		if(limit[i].vdisp.block == 1){
 			lenses->vdisp[i] = bayespot[index*nparam+param_index];
 			param_index++;
 		}
 		if(limit[i].rcut.block == 1){
 			lenses->rcut[i] = bayespot[index*nparam+param_index];
 			param_index++;
 		}
 		if(limit[i].z.block == 1){
 			lenses->z[i] = bayespot[index*nparam+param_index];
 			param_index++;
 		}
 		module_readParameters_calculatePotentialparameter_SOA(lenses, i);
 	}
 	//Skip redshift image optimisation
 	if(runmode->potfile != 0){
 	param_index += runmode->N_z_param;
-	std::cerr << "N_z_param " << runmode->N_z_param << std::endl;
-	printf("DNDBSFB ircut %d\n",potfile->ircut);
+	//std::cerr << "N_z_param " << runmode->N_z_param << std::endl;
+	//printf("DNDBSFB ircut %d\n",potfile->ircut);
 	//Start potfile updating
 	if(potfile->ircut > 0){
 		potfile->cut1 = bayespot[index*nparam+param_index];
 		//printf("cur %f ircut %d\n",potfile->cut,potfile->ircut);
 		//std::cerr << index*nparam+param_index_pot << " "<< bayespot[index*nparam+param_index_pot] << std::endl;
 		param_index++;
 	}
 	//std::cerr << "param " << param_index_pot << std::endl;
 	if(potfile->isigma > 0){
 		potfile->sigma1 = bayespot[index*nparam+param_index];
 		//std::cerr << index*nparam+param_index_pot << " "<< bayespot[index*nparam+param_index_pot] << std::endl;
 		param_index++;
 	}
 
 	setScalingRelations(runmode,cosmology,potfile,lenses);
 	}
 	/*
 	for(int i = runmode->nhalos; i < runmode->nhalos + runmode->npotfile; i++){
 		param_index_pot = param_index;
 		//std::cerr << "param " << param_index_pot << std::endl;
 
 		//std::cerr << "param " << param_index_pot << std::endl;
 		module_readParameters_calculatePotentialparameter_SOA(lenses, i);
 	}*/
 
 		//update potential parameters
 
 }
 
 /** @brief This module function reads the cosmology parameters from the parameter file
 * 
 * This module function reads the cosmology parameters from the parameter file. The given pointer is 
 * updated with the results.
 * Read an infile: The structure of the file is as follows:
 	|    .
 	|    .
 	|    .
 	|cosmology
 	|		model	x <---- this is a value
 	|		 H0	x
 	|		  .	.
 	|		  .	.
 	|		 end
 	|    .
 	|    .
 	|  finish
 * 	
 * @param Cosmology cosmology
 * @param infile path to file	 
 */
 
 void module_readParameters_readCosmology(std::string infile, cosmo_param &Cosmology)
 {
 
     std::string first, second, third, line1, line2;
 
     Cosmology.model = 1;
     Cosmology.H0 = 50;
     Cosmology.h = 1.;
     Cosmology.omegaM = 1.;
     Cosmology.omegaX = 0;
     Cosmology.wX = -1.;
     Cosmology.wa = 0.;
     Cosmology.curvature = 0.;
 
 
     std::ifstream IN(infile.c_str(),std::ios::in); // open file
     if ( IN )
     {
 	std::getline(IN,line1);
 	std::istringstream read1(line1); // create a stream for the line
 	read1 >> first;
 	while(strncmp(first.c_str(), "fini",4) != 0 ) // read line by line until finish is reached
         {
             if ( strncmp(first.c_str(), "cosmolog", 8) == 0){ // if the line contains information about cosmology
 
 		    std::getline(IN,line2);    // read the line word by word
 		    std::istringstream read2(line2);
 		    read2 >> second >> third;
 
 		    while(strncmp(second.c_str(), "end",3) != 0)    // Go ahead until "end" is reached
         	    { 
 
         		if ( !strcmp(second.c_str(), "model") )  // set model of universe
         		{                                                          
             		Cosmology.model=atoi(third.c_str());
         		}
        			else if ( !strcmp(second.c_str(), "H0") ) // set Hubble constant
         		{
             		Cosmology.H0=atof(third.c_str());
             		Cosmology.h = Cosmology.H0 / 50.;
         		}
         		else if ( !strcmp(second.c_str(), "omegaM") || !strcmp(second.c_str(), "omega") ) // set density of matter
         		{
            		Cosmology.omegaM=atof(third.c_str());
         		}
         		else if ( !strcmp(second.c_str(), "omegaX") || !strcmp(second.c_str(), "lambda") ) // set cosmological constant
         		{
             		Cosmology.omegaX=atof(third.c_str());
         		}
         		else if ( !strcmp(second.c_str(), "wX") || !strcmp(second.c_str(), "q") || !strcmp(second.c_str(), "w0") )     // set  "q" for Model 2, "wX" for Model 3, "w0" for Model 4
         		{
             		Cosmology.wX=atof(third.c_str());
         		}
         		else if ( !strcmp(second.c_str(), "wa") || !strcmp(second.c_str(), "n") || !strcmp(second.c_str(), "delta") || !strcmp(second.c_str(), "w1") )     // set  "n" for Model 2, "delta" for model 3, "w1" for model 4  
         		{
             		Cosmology.wa=atof(third.c_str());
         		}
         		else if ( !strcmp(second.c_str(), "omegaK") ) // set universe curvature
         		{
             		Cosmology.curvature=atof(third.c_str());
         		}
 			// read next line
 			std::getline(IN,line2);
 		        std::istringstream read2(line2);
 		        read2 >> second >> third;
 
     		   }
 
     		// if a flat Universe
     		if ( Cosmology.curvature == 0. )
     		{
         	Cosmology.omegaX = 1 - Cosmology.omegaM;
     		}
     		else
         	Cosmology.curvature = Cosmology.omegaM + Cosmology.omegaX - 1;
 		
 	    }
 	    // read next line
 	    std::getline(IN,line1);
 	    std::istringstream read1(line1);
 	    read1 >> first;
 
         }
 
         IN.close();
 
     }
     else
     {
         fprintf(stderr, "ERROR: file %s not found\n", infile.c_str());  // Exit if file was not found
         exit(-1);
     }
 
 }
 
 /** @brief This module function reads the number of sources, arclets, etc. in the parameter file. We need to know this to allocate the memory
 * 
 * Function to read the number of multiple images and clumps
 * Check if there is 1 limit for each clump set
 * The program reads the number of potentials and limits defined, and checks whether there are the same number
 * Then it opens the imfile to read the numbers of images and set of images
 * Reads also the size of the multiple images area, the size of a pixel, and the runmode.
 * 	
 * @param infile path to file
 * @param runmode runmode parameter
 */
 
 void read_runmode(std::istream &IN, struct runmode_param *runmode){
 	std::string  first, second, third, fourth, fifth, line1, line2;
 	//sscanf variables
 	double in1, in2;	//%lf in1, then (type_t)in1 ;
 	std::getline(IN,line2);
 					std::istringstream read2(line2);
 					read2 >> second >> third >> fourth;    // Read in 4 words
 		    		while (strncmp(second.c_str(), "end", 3))    // Read until we reach end
 		    		{
 						if ( !strcmp(second.c_str(), "nbgridcells") )
 		        		{
 							sscanf(line2.c_str(), " %*s %d ", &runmode->nbgridcells);
 		        		}
 
 		        		if ( !strcmp(second.c_str(), "source") )
 		        		{
 							char filename[FILENAME_SIZE];
 		            		sscanf(line2.c_str(), " %*s %d %s ", &runmode->source, &filename);
 		            		runmode->sourfile = filename;
 		        		}
 
 						if ( !strcmp(second.c_str(), "image") )
 		        		{
 							char filename[FILENAME_SIZE];
 		            		sscanf(line2.c_str(), " %*s %d %s ", &runmode->image, &filename);
 		            		runmode->imagefile = filename;
 		        		}
 		        		if ( !strcmp(second.c_str(), "inverse") )
 		        		{
 							sscanf(line2.c_str(), " %*s %d ", &runmode->inverse);
 		        		}
 		        		if ( !strcmp(second.c_str(), "mass") )
 		        		{
 							sscanf(line2.c_str(), " %*s %d %d %lf %lf", &runmode->mass, &runmode->mass_gridcells, &in1, &in2);
 							runmode->z_mass = (type_t)in1;
 							runmode->z_mass_s =(type_t)in2;
 		        		}
 		        		if ( !strcmp(second.c_str(), "poten") )
 		        		{
 							sscanf(line2.c_str(), " %*s %d %d %lf", &runmode->potential, &runmode->pot_gridcells, &in1);
 							runmode->z_pot = (type_t)in1;
 		        		}
 		        		if ( !strcmp(second.c_str(), "dpl") )
 		        		{
 							sscanf(line2.c_str(), " %*s %d %d %lf", &runmode->dpl, &runmode->dpl_gridcells, &in1);
 							runmode->z_dpl = (type_t)in1;
 		        		}
 		        		if ( !strcmp(second.c_str(), "grid") )
 		        		{
 							sscanf(line2.c_str(), " %*s %d %d %lf", &runmode->grid, &runmode->gridcells, &in1);
 							runmode->zgrid = (type_t)in1;
 		        		}
 		        		if ( !strcmp(second.c_str(), "ampli") )
 		        		{
 							sscanf(line2.c_str(), " %*s %d %d %lf", &runmode->amplif, &runmode->amplif_gridcells, &in1);
 							runmode->z_amplif = (type_t)in1;
 							//std::cerr<<runmode_>ampli << << <<std::endl;
 									        		}
 						if ( !strcmp(second.c_str(), "arclets") )
 		        		{
 		            		runmode->arclet = 1;  // Not supported yet
 		        		}
 				        if (!strcmp(second.c_str(), "reference"))
 				        {
 				        	sscanf(line2.c_str(), "%*s %*d %lf %lf", &in1, &in2);
 				                runmode->ref_ra = (type_t)in1;
 				                runmode->ref_dec =(type_t)in2;
 
 				            //std::cerr << line2 << std::endl;
 				            std::cout << "Reference: Ra " << runmode->ref_ra << " Dec:" << runmode->ref_dec <<std::endl;
 				        }
 
 						if ( !strcmp(second.c_str(), "time") )
 		        		{
 							sscanf(line2.c_str(), " %*s %d ", &runmode->time);
 
 		        		}
 						if( !strncmp(second.c_str(), "Debug",5) )    // Read in if we are in debug mode
 						{
 						runmode->debug = 1;
 						}
 
 						// Read the next line
 						std::getline(IN,line2);
 						std::istringstream read2(line2);
 						read2 >> second >> third;
 		    		}
 
 }
 
 void read_runmode_potential(std::istream &IN, int &numberPotentials){
 	std::string  first, second, third, fourth, fifth, line1, line2;
 
 	numberPotentials += 1;
 	std::getline(IN,line2);
 	std::istringstream read2(line2);
 	double z(0);
 	read2 >> second >> third;
 	//std::cout << second  << third << std::endl;
 	while (strncmp(second.c_str(), "end", 3))    // Read until we reach end
 	{
 		if (!strcmp(second.c_str(), "z_lens"))  // Get redshift
 		{
 			z=atof(third.c_str());
 		}
 		// Read the next line
 		std::getline(IN,line2);
 		std::istringstream read2(line2);
 		read2 >> second >> third;
 	}
 	// Check if redshift from current halo was initialized
 	if ( z == 0. )
 		{
 			fprintf(stderr, "ERROR: A redshift is not defined for a potential \n");
 			exit(-1);
 		}
 }
 
 void read_runmode_image(std::istream &IN, struct runmode_param *runmode){
 	std::string  first, second, third, fourth, fifth, line1, line2;
 
     std::getline(IN,line2);
  	std::istringstream read2(line2);
  	double z(0);
  	read2 >> second >> third;
  	while (strncmp(second.c_str(), "end", 3))    // Read until we reach end
 		{
 			if ( !strcmp(second.c_str(), "multfile") )
 			{
 				char filename[FILENAME_SIZE];
 				sscanf(line2.c_str(), " %*s %d %s ", &runmode->multi, &filename);
 				runmode->imagefile = filename;
 			}
 			if ( !strcmp(second.c_str(), "z_m_limit") )
 			{
 				runmode->N_z_param += 1 ;
 			}
 			//std::cout << runmode->multi << runmode->imagefile << std::endl;
 			// Read the next line
 			std::getline(IN,line2);
 			std::istringstream read2(line2);
 			read2 >> second >> third;
 		}
 }
 
 void read_runmode_potfile(std::istream &IN, struct runmode_param *runmode){
 	std::string  first, second, third, fourth, fifth, line1, line2;
 
     runmode->potfile = 1;
 
     std::getline(IN,line2);
 	std::istringstream read2(line2);
 	read2 >> second >> third >> fourth;    // Read in 4 words
 	while (strncmp(second.c_str(), "end", 3))    // Read until we reach end
 	{
 
 	if ( !strcmp(second.c_str(), "filein") )
 	{
 		//std::cerr<< third << " " << fourth << " " << runmode->potfilename << std::endl;
 		runmode->potfilename = fourth;
 		//std::cerr<< line2 << runmode->potfilename;
 		break;
 
 	}
 	// Read the next line
 	std::getline(IN,line2);
 	std::istringstream read2(line2);
 	read2 >> second >> third;
 	}
 }
 
 void read_runmode_countimages(struct runmode_param *runmode){
 	std::string line2;
 	int old_j = 0;
 	int j = 0;
 	int imageIndex = 0;
 
 	if (runmode->image == 1 or runmode->inverse == 1 or runmode->time >= 1 or runmode->multi >= 1){
 
 		std::string imageFile_name = runmode->imagefile;
 		std::ifstream IM(imageFile_name.c_str(), std::ios::in);
 
 		if ( IM )
 		{
 			while( std::getline(IM,line2) )  // Read every line
 			{
 			if ( strncmp(line2.c_str(), "#", 1) == 0){	//Skipping commented lines
 				continue;
 			}
 			else{
 				int j=atoi(line2.c_str());
 				if(j != old_j){				//If a new set has been reached, increase the nset iterator and update old j
 					runmode->nsets +=1;
 					old_j = j;
 				}
 				imageIndex++;
 				}
 			}
 		}
 		else{
 
 			fprintf(stderr, "ERROR: file %s not found\n", imageFile_name.c_str());
 			exit(-1);
 		}
 
 	runmode->nimagestot=imageIndex;
 	IM.close();
 	}
 }
 
 void read_runmode_countsources(struct runmode_param *runmode){
 	std::string  imageFile_name,line1, line2;
 
 	if (runmode->source == 1 and runmode->image == 0 and runmode->multi == 0 ){
 		imageFile_name = runmode->sourfile;
 		//printf("Source to image mode activated\n");
 
 	std::ifstream IM(imageFile_name.c_str(), std::ios::in);
 	//printf(" Booo says hello again \n");
     	if ( IM ){
 			int i = 0;
 	        while( std::getline(IM,line1) ){    // Read until we reach the end
 	            i++;
 			}
 			runmode->nsets = i ;
 	    	}
 		else{
 
 			fprintf(stderr, "ERROR: file %s not found\n", imageFile_name.c_str());
 			exit(-1);
 		}
 
 	runmode->nimagestot= 0;	// Important
 	IM.close();
 
 	}
 }
 
 void read_runmode_countpotfile(struct runmode_param *runmode){
 	std::string  imageFile_name,line1, line2;
 
 	if (runmode->potfile == 1){
 			imageFile_name = runmode->potfilename;
 
 			std::ifstream IM(imageFile_name.c_str(), std::ios::in);
 
 	    	if ( IM ){
 				int i = 0;
 		        while( std::getline(IM,line1) ){    // Read until we reach the end
 	                if ( line1[0] == '#' ){
 	                    continue;
 	                }
 		            i++;
 				}
 				runmode->npotfile = i ;
 		    	}
 			else{
 
 				fprintf(stderr, "ERROR: file %s not found\n", imageFile_name.c_str());
 				exit(-1);
 			}
 
 		IM.close();
 
 		}
 }
 
 
 void module_readParameters_readRunmode(std::string infile, struct runmode_param *runmode)
 {
 
 	std::string  first, second, third, fourth, fifth, line1, line2;
 	int Nsis(0), Npiemd(0);
 	/// Set default values
 
 	runmode->nbgridcells = 1000;
 	runmode->source = 0;
 	runmode->image = 0;
 	runmode->N_z_param = 0;
 	runmode->nsets = 0;
 	runmode->nhalos = 0;
 	runmode->multi = 0;
 	runmode->mass = 0;
 	runmode->mass_gridcells = 1000;
 	runmode->z_mass = 0.4;
 	runmode->z_mass_s = 0.8;
 	runmode->potential = 0;
 	runmode->pot_gridcells = 1000;
 	runmode->potfile = 0; //weird seg fault due to this line, haven't figured out why
 	runmode->npotfile = 0;
 	runmode->z_pot = 0.8;
 	runmode->dpl = 0;
 	runmode->dpl_gridcells = 1000;
 	runmode->z_dpl = 0.8;
 	runmode->inverse = 0;
 	runmode->arclet = 0;
 	runmode->debug = 0;
 	runmode->nimagestot = 0;
 	runmode->nsets = 0;
 	runmode->gridcells = 1000;
 	//std::cerr << sizeof(*runmode) << std::endl;
 	runmode->cline = 0;
 	runmode->time = 0;
 	runmode->inverse = 0;
 	runmode->arclet = 0;
 	runmode->ref_ra = 0;
 	runmode->ref_dec = 0;
 
 
 	int j=0;
 	std::string imageFile_name;
 	int imageIndex=0;
 	int numberPotentials=0, numberLimits=0;
 
 /*************************** read nhalos, imfile_name, pixel_size, multipleimagesarea_size and runmode from configuration file *********************/
 
 std::ifstream IN(infile.c_str(), std::ios::in);
     if ( IN )
     {
 
 	std::getline(IN,line1);
 	std::istringstream read1(line1); // create a stream for the line
 	read1 >> first;  // Read the first word
 	//std::cout<<first;
         while(  strncmp(first.c_str(), "fini",4) != 0  )    // Continue until we reach finish
         {
 			if ( strncmp(first.c_str(), "runmode", 7) == 0){    // Read in runmode information
 				read_runmode(IN,runmode);
 		    }
 			else if (!strncmp(first.c_str(), "potent", 6)) // each time we find a "potential" string in the configuration file, we add an halo
             {
 				read_runmode_potential(IN,numberPotentials);
 				//std::cerr<<numberPotentials << std::endl;
             }
             else if (!strncmp(first.c_str(), "image", 5)) // same for the limit of the potential
             {
             	read_runmode_image(IN,runmode);
             }
             else if (!strncmp(first.c_str(), "limit", 5)) // same for the limit of the potential
             {
                 numberLimits++;
             }
             else if (!strncmp(first.c_str(), "cline", 5))
             {
                 runmode->cline = 1;
             }
 
             else if (!strncmp(first.c_str(), "potfile", 7))
             {
             	read_runmode_potfile(IN,runmode);
             }
 	    // read the next line
 	    std::getline(IN,line1);
 	    std::istringstream read1(line1);
 	    read1 >> first;
 	    //std::cout<<first;
         }
 
         IN.close();
 	
         //if(numberLimits!=numberPotentials) printf("Warning : Number of clumps different than number of limits in %s\n", infile.c_str()); // must be limits for each halo
 	runmode->nhalos=numberPotentials;
 
     }
     else
     {
         fprintf(stderr, "ERROR: file %s not found\n", infile.c_str());
         exit(-1);
     }
     //
     //getting nimage value (number of images), nset value (number of sources) and npotfile
     //if image or multi mode is activated get nimage and nset
     read_runmode_countimages(runmode);
 	//if source mode is activated, get nset
     read_runmode_countsources(runmode);
     //if potfile mode, count number of potential in potfile
     read_runmode_countpotfile(runmode);
 
 
 std::cerr <<"nsets: " <<runmode->nsets <<" nhalos: " << runmode->nhalos << " nimagestot: " << runmode->nimagestot << " npotfile: " << runmode->npotfile << " multi: " << runmode->multi<< std::endl;
 
 }
 
 
 
 
 /** @brief read the positions, redshifts and numbers of multiple images from the images file
 * 
 * This module function reads in the strong lensing images
 * Output: image coordinates, image shapes (semi-minor, semi-major of ellipse and orientation angle), source redshifts, number of images per set
 * 	
 ** the images file must contain
 	1 	x_center	y_center	major axis	minor axis	ellipticity angle	redshift
 	1	   .		   .		   .		    .			.		    .
 	1	   .		   .		   .		    .			.		    .
 	2	   .		   .		   .		    .			.		    .
 	2	   .		   .		   .		    .			.		    .
 	2	   .		   .		   .		    .			.		    .
 	2	   .		   .		   .		    .			.		    .
 	2	   .		   .		   .		    .			.		    .
 	.	   .		   .		   .		    .			.		    .
 	.	   .		   .		   .		    .			.		    .
 	.	   .		   .		   .		    .			.		    .
    nsetofimages	   .		   .		   .		    .			.		    .
 
    At each line we store in j the index of the set of images to store the redshifts and the number of images of each set
    At each line we add an images in the position of images' array
 * 
 * @param runmode runmode parameter
 * @param image	array where images will be stored
 * @param nImagesSet array where the number of images per Set will be stored
 */
 
 
 /// read the positions, redshifts and numbers of multiple images from the images file
 void module_readParameters_readImages(const struct runmode_param *runmode, struct galaxy image[], int nImagesSet[])
 {
 
 	std::string second, line1;
 	int imageIndex=0;
 	int j=0;
 	//cast variables
 	double cast_x,cast_y,cast_a,cast_b,cast_theta,cast_z,cast_mag;
 	
 
 
 
 for(int i=0; i<runmode->nsets; i++){
 	nImagesSet[i]=0;
 }
 
 /*********initialisation of nimages array*********/
 for(int i=0; i<runmode->nimagestot; i++){
 	
 	/* Initialise here the variables of the images*/
 	image[i].center.x = image[i].center.y = 0;
 	image[i].shape.a = image[i].shape.b = image[i].shape.theta = 0;
 	image[i].redshift = 0; 
 
     }
 
 //printf("imagefile :%d \n", runmode->imagefile.c_str
 
 // Read in images
     std::ifstream IM(runmode->imagefile.c_str(),std::ios::in);  
 	if ( IM )
     	{
 			int i = 0;
         	while( std::getline(IM,line1) )    // Read until we reach the end
         	{
                         // Read in the parameters, * means we read in a parameter but do not store it
             
 			sscanf(line1.c_str(), "%d %lf %lf %lf %lf %lf %lf %lf", &j, &cast_x, &cast_y, &cast_a, &cast_b, &cast_theta, &cast_z, &cast_mag);
 			//Casting
 			image[i].center.x =(type_t)cast_x;
 			image[i].center.y =(type_t)cast_y;
 			image[i].shape.a =(type_t)cast_a;
 			image[i].shape.b =(type_t)cast_b;
 			image[i].shape.theta =(type_t)cast_theta;
 			image[i].redshift =(type_t)cast_z;
 			image[i].mag =(type_t)cast_mag;
 			//Variables
 			nImagesSet[j-1]++;  // Increase the counter of the number of system for system with number j-1 by 1
             imageIndex++;
             i++;
 		}
 	}
 	else{
 			std::cout << "Error : file "  << runmode->imagefile <<  " not found" << std::endl;
 			exit(-1);
 		}
 	
 	IM.close();
 }
 
 /** @brief read the positions, redshifts and numbers of multiple sources from the sources file
 * 
 * This module function reads in the strong lensing sources
 * Output: sources coordinates, sources shapes (semi-minor, semi-major of ellipse and orientation angle), source redshifts
 * 	
 *  the images file must contain
 	 	x_center	y_center	major axis	minor axis	ellipticity angle	redshift
 	1	   .		   .		   .		    .			.		    .
 	2	   .		   .		   .		    .			.		    .
 	3	   .		   .		   .		    .			.		    .
    
 * 
 * @param runmode runmode parameter
 * @param source	array where sources will be stored
 */
 
 
 /// read the positions, redshifts and numbers of multiple images from the images file
 void module_readParameters_readSources(struct runmode_param *runmode, struct galaxy source[])
 {
 
 	std::string second, line1;
 	int j=0;
 	//cast variables
 	double cast_x,cast_y,cast_a,cast_b,cast_theta,cast_z,cast_mag;
 	
 	//Calculating nset
 	std::ifstream IM(runmode->sourfile.c_str(),std::ios::in);
 	if ( IM ){
 		int i = 0;
         while( std::getline(IM,line1) ){    // Read until we reach the end
             i++;
 		}
 		runmode->nsets = i ;
 	}
 	else{
 			std::cout << "Error : file "  << runmode->sourfile <<  " not found" << std::endl;
 			exit(-1);
 		}
 		
 	IM.close();
 
 /*********initialisation of nimages array*********/
 for(int i=0; i<runmode->nsets; i++){
 	
 	/* Initialise here the variables of the images*/
 	source[i].center.x = source[i].center.y = 0;
 	source[i].shape.a = source[i].shape.b = source[i].shape.theta = 0;
 	source[i].redshift = 0; 
 	source[i].mag = 0; 
 
     }
 
 	std::ifstream IM2(runmode->sourfile.c_str(),std::ios::in);
 // Read in images
 	if ( IM2 )
     	{
 			int i = 0;
         	while( std::getline(IM2,line1) )    // Read until we reach the end
         	{
                         // Read in the parameters, * means we read in a parameter but do not store it
             
 			sscanf(line1.c_str(), "%d %lf %lf %lf %lf %lf %lf %lf", &j, &cast_x, &cast_y, &cast_a, &cast_b, &cast_theta, &cast_z, &cast_mag);
 			//Casting
 			source[i].center.x =(type_t)cast_x;
 			source[i].center.y =(type_t)cast_y;
 			source[i].shape.a =(type_t)cast_a;
 			source[i].shape.b =(type_t)cast_b;
 			source[i].shape.theta =(type_t)cast_theta;
 			source[i].redshift =(type_t)cast_z;
 			source[i].mag =(type_t)cast_mag;
 			
             i++;
 		}
 	}
 	else{
 			std::cout << "Error : file "  << runmode->sourfile <<  " not found" << std::endl;
 			exit(-1);
 		}
 	
 	IM2.close();
 }
 
 /** @brief This module function reads the critical and caustic line information
  *@param infile path to file
 * @param cline cline parameter variable
 */
 
 
 
 #if 0
 void module_readParameters_CriticCaustic(std::string infile, cline_param *cline){
 	
     std::string first, second, third, line1, line2;
     //cast variables
     double cast_1;
     //Default value initialiasation
     cline->nplan = 0;
     for(int i =0; i < NPZMAX; ++i){
 		cline->cz[i] = 0;
 	    cline->dos[i] = 0;
 	    cline->dls[i] = 0;
 	    cline->dlsds[i] = 0;
 	}
 
     cline->limitLow = 1;
     cline->dmax = 1;
     cline->limitHigh = 10;
     cline->nbgridcells = 1000;
 
 	std::ifstream IN(infile.c_str(), std::ios::in);
     if ( IN ){
 		while(std::getline(IN,line1)){
 			std::istringstream read1(line1); // create a stream for the line
 			read1 >> first;
 			if ( strncmp(first.c_str(), "cline", 5) == 0){    // Read in runmode information
 	            std::getline(IN,line2);
 				std::istringstream read2(line2);
 				read2 >> second >> third;    // Read in 4 words
 	    		while (strncmp(second.c_str(), "end", 3))    // Read until we reach end
 	    		{	
 	        		if ( !strcmp(second.c_str(), "nplan") ){
 						sscanf(third.c_str(), "%d", &cline->nplan);
 				            if ( cline->nplan > NPZMAX ){
 				                cline->nplan = NPZMAX;
 							}
 							int j = 0;
 				            while ( read2 >> third )
 				            {
 				                sscanf(third.c_str(), "%lf", &cast_1);
 				                cline->cz[j] =(type_t)cast_1;
 				                //printf(" zf %f \n",cline->cz[j]);
 				                j++;
 				            }
 						}
 	            		
 					if ( !strcmp(second.c_str(), "dmax") )
 	        		{
 	            		sscanf(third.c_str(), "%lf", &cast_1);
 	            		cline->dmax =(type_t)cast_1;
 	            		cline->xmax = cline->dmax;
 	            		cline->xmin = -cline->dmax;
 	            		cline->ymax = cline->dmax;
 	            		cline->ymin = -cline->dmax;	            		
 	        		}
 	        		if ( !strcmp(second.c_str(), "pas") || !strcmp(second.c_str(), "step") || !strcmp(second.c_str(), "limitLow") )
 	        		{
 						sscanf(third.c_str(), "%lf", &cast_1);
 						cline->limitLow =(type_t)cast_1;
 	        		}
 	        		if ( !strcmp(second.c_str(), "limitHigh") )
 	        		{
 						sscanf(third.c_str(), "%lf", &cast_1);
 						cline->limitHigh =(type_t)cast_1;
 	        		}
 	        		if ( !strcmp(second.c_str(), "nbgridcells") )
 	        		{
 						sscanf(third.c_str(), "%d", &cline->nbgridcells);
 	        		}
 					// Read the next line
 					std::getline(IN,line2);
 					std::istringstream read2(line2);
 					read2 >> second >> third;
 	    		}
 		    }  
 		} // closes while loop    
 
 }  // closes if(IN)
 
 
 
 IN.close();
 	
 }
 #endif
 
 /** @brief This module function reads the potfile information
  *@param infile path to file
 * @param potfile_param potfile parameter variable
 */
 
 
 
 void module_readParameters_readpotfiles_param(std::string infile, potfile_param *potfile, cosmo_param cosmology){
 
     std::string first, second, third, line1, line2;
     //cast variables
     double cast_1, cast_2;
     //Default value potfile initialiasation
 
     potfile->potid = 0;
     potfile->ftype = 0;
     potfile->type = 0;
     potfile->zlens = 0;
     potfile->mag0 = 0;
     potfile->isigma = 0;
     potfile->sigma = -1;
     potfile->core = -1.0;
     potfile->corekpc = -1;
     potfile->ircut = 0;
     potfile->cut1 = DBL_MAX;
     potfile->cut2 = 0;
     potfile->cutkpc1 = DBL_MAX;
     potfile->cutkpc2 = 0;
     potfile->islope = 0;
     potfile->slope1 = 0;
     potfile->slope2 = 0;
     potfile->npotfile = 0;
     potfile->ivdscat = 0;
 	potfile->vdscat1 =0;
 	potfile->vdscat2 = 0;
     potfile->ircutscat = 0;
 
 	std::ifstream IN(infile.c_str(), std::ios::in);
     if ( IN ){
 		while(std::getline(IN,line1)){
 			std::istringstream read1(line1); // create a stream for the line
 			read1 >> first;
 			if ( strncmp(first.c_str(), "potfile", 7) == 0){    // Read in potfile information
 	            std::getline(IN,line2);
 				std::istringstream read2(line2);
 				read2 >> second >> third;    // Read in 4 words
 	    		while (strncmp(second.c_str(), "end", 3))    // Read until we reach end
 	    		{
 
 					if ( !strcmp(second.c_str(), "filein") )
 	        		{
 						sscanf(line2.c_str(), " %*s %d %s ", &potfile->ftype, potfile->potfile);
 	        		}
 					else if ( !strcmp(second.c_str(), "type") )
 	        		{
 						sscanf(third.c_str(), "%d", &potfile->type);
 	        		}
 					else if ( !strcmp(second.c_str(), "zlens") || !strcmp(second.c_str(), "z_lens"))
 	        		{
 						sscanf(third.c_str(), "%lf", &cast_1);
 						potfile->zlens =(type_t)cast_1;
 	        		}
 
 	        		else if (!strcmp(second.c_str(), "mag0") || !strcmp(second.c_str(), "r200"))
 					{
 						sscanf(third.c_str(), "%lf", &cast_1);
 						potfile->mag0 =(type_t)cast_1;
 					}
 					else if (!strcmp(second.c_str(), "select"))
 					{
 						sscanf(third.c_str(), "%d", &potfile->select);
 					}
 					else if (!strcmp(second.c_str(), "core"))
 					{
 						sscanf(third.c_str(), "%lf", &cast_1);
 						potfile->core =(type_t)cast_1;
 					}
 					else if (!strcmp(second.c_str(), "corekpc"))
 					{
 						sscanf(third.c_str(), "%lf", &cast_1);
 						potfile->corekpc =(type_t)cast_1;
 					}
 					else if (!strcmp(second.c_str(), "cut"))
 					{
 						sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ircut, &cast_1, &cast_2);
 						potfile->cut1 =(type_t)cast_1;
 						potfile->cut2 =(type_t)cast_2;
 					}
 					else if (!strcmp(second.c_str(), "cutkpc"))
 					{
 						sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ircut, &cast_1, &cast_2);
 						potfile->cutkpc1 =(type_t)cast_1;
 						potfile->cutkpc2 =(type_t)cast_2;
 					}
 					else if (!strcmp(second.c_str(), "slope") || !strcmp(second.c_str(), "m200slope"))
 					{
 						sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->islope, &cast_1, &cast_2);
 						potfile->slope1 =(type_t)cast_1;
 						potfile->slope2 =(type_t)cast_2;
 					}
 					else if (!strcmp(second.c_str(), "sigma"))
 					{
 						sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->isigma, &cast_1, &cast_2);
 						potfile->sigma1 =(type_t)cast_1;
 						potfile->sigma2 =(type_t)cast_2;
 					}
 					else if (!strcmp(second.c_str(), "vdslope") || !strcmp(second.c_str(), "c200slope"))
 					{
 						sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ivdslope, &cast_1, &cast_2);
 						potfile->vdslope1 =(type_t)cast_1;
 						potfile->vdslope2 =(type_t)cast_2;
 					}
 					else if (!strncmp(second.c_str(), "vdscat", 6))
 					{
 						sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ivdscat, &cast_1, &cast_2);
 						potfile->vdscat1 =(type_t)cast_1;
 						potfile->vdscat2 =(type_t)cast_2;
 					}
 					else if (!strncmp(second.c_str(), "rcutscat", 8))
 					{
 						sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ircutscat, &cast_1, &cast_2);
 						potfile->rcutscat1 =(type_t)cast_1;
 						potfile->rcutscat2 =(type_t)cast_2;
 					}
 					else if (!strcmp(second.c_str(), "a") || !strcmp(second.c_str(), "m200"))
 					{
 						sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ia, &cast_1, &cast_2);
 						potfile->a1 =(type_t)cast_1;
 						potfile->a2 =(type_t)cast_2;
 					}
 					else if (!strcmp(second.c_str(), "b") || !strcmp(second.c_str(), "c200"))
 					{
 						sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ib, &cast_1, &cast_2);
 						potfile->b1 =(type_t)cast_1;
 						potfile->b2 =(type_t)cast_2;
 					}
 
 					// Read the next line
 					std::getline(IN,line2);
 					std::istringstream read2(line2);
 					read2 >> second >> third;
 	    		}
 		    }
 		} // closes while loop
 
 }  // closes if(IN)
 
 IN.close();
 
 //Calculate vdisp, rcut and rcore
 
 //*********************************************************************
 // Set the Potfile current and limiting values
 //*********************************************************************
 if ( potfile->ftype <= 4 )
 {
     // Scale potfile SIGMA
 	potfile->sigma = potfile->sigma1;
 
     // ... and potfile RCUT
     if ( potfile->cut1 == DBL_MAX && potfile->cutkpc1 != DBL_MAX )
     {
     	potfile->cut1 = potfile->cutkpc1 / (d0 / cosmology.h * module_cosmodistances_observerObject(potfile->zlens,cosmology));
     	potfile->cut2 = potfile->cutkpc2 / (d0 / cosmology.h * module_cosmodistances_observerObject(potfile->zlens,cosmology));
     }
     potfile->cut = potfile->cut1;
 
     // ... and potfile RCORE
     if ( potfile->core == -1.0 && potfile->corekpc != -1 )
     	potfile->core = potfile->corekpc / (d0 / cosmology.h * module_cosmodistances_observerObject(potfile->zlens,cosmology));
 
     // ... and potfile RCUT SLOPE
     potfile->slope = potfile->slope1;
 
     // ... and potfile VDSLOPE
     potfile->vdslope = potfile->vdslope1;
 }
 
 
 
 
 }
 
 /** @brief This module function loads the potential from the potfile into a lens
  *@param infile path to file
 * @param cline cline parameter variable
 */
 
 
 /*
 void module_readParameters_readpotfiles(const runmode_param *runmode, potfile_param *potfile, Potential *lens){
 
 	std::string first, second, line1;
 	double cast_1, cast_2;
 	double aa,bb;
 	double DTR=acos(-1.)/180.;
 	//cast variables
 	double cast_x,cast_y,cast_theta,cast_lum,cast_mag;
 	potfile->reference_ra = potfile->reference_dec = 0;
 	std::cerr << "Ref pot: "<< potfile->reference_ra <<  " " << potfile->reference_dec << std::endl;
 
 // Read in potentials
     std::ifstream IM(potfile->potfile,std::ios::in);
 	if ( IM )
     	{
 			int i = runmode->nhalos;
         	while( std::getline(IM,line1) )    // Read until we reach the end
         	{
     			std::istringstream read1(line1); // create a stream for the line
     			read1 >> first;
                 // Skip commented lines with #
     			//std::cerr << first << std::endl;
                 if (!strncmp(first.c_str(), "#REFERENCE", 10) ){
                 	sscanf(line1.c_str(), " %*s %*d%lf%lf",  &cast_1, &cast_2);
                 	potfile->reference_ra = (type_t) cast_1;
                 	potfile->reference_dec = (type_t) cast_2;
                 	std::cerr << "Ref potfiles: "<< potfile->reference_ra <<  " " << potfile->reference_dec << std::endl;
                     continue;
 
                 }
                 else if ( line1[0] == '#' ){
                 	 continue;
                 }
 
 
                 // Default initialisation of clump
                 lens[i].type = potfile->type;
                 lens[i].z = potfile->zlens;
                 lens[i].ellipticity_potential = lens[i].ellipticity = 0.;
                 lens[i].alpha  = 0.;
                 lens[i].rcut = 0.;
                 lens[i].rcore = 0.;
                 lens[i].rscale = 0.;
                 lens[i].mag = 0.;
                 lens[i].lum = 0.;
                 lens[i].vdisp = 0.;
                 lens[i].position.x = lens[i].position.y = 0.;
                 lens[i].ellipticity_angle = 0.;
                 lens[i].weight = 0;
                 lens[i].exponent = 0;
                 lens[i].einasto_kappacritic = 0;
 
 
 				// Read a line of the catalog
 				if ( potfile->ftype == 1 || potfile->ftype == 3 )
 				{
 						sscanf( line1.c_str(), "%s%lf%lf%lf%lf%lf%lf%lf",
 							&lens[i].name, &cast_x, &cast_y,
 								 &aa, &bb, &cast_theta, &cast_mag, &cast_lum);
 						lens[i].ellipticity = (type_t) (aa*aa-bb*bb)/(aa*aa+bb*bb);
 						if ( lens[i].ellipticity < 0 )
 						{
 							fprintf( stderr, "ERROR: The potfile clump %s has a negative ellipticity.\n", lens[i].name );
 							exit(-1);
 						}
 						//goto NEXT;
 
 				}
 
 
 				//Casting
 				lens[i].position.x =(type_t)cast_x;
 				lens[i].position.y =(type_t)cast_y;
 				lens[i].theta =(type_t)cast_theta;
 				lens[i].lum =(type_t)cast_lum;
 				lens[i].mag =(type_t)cast_mag;
 				//general parameters
 				lens[i].vdisp = potfile->sigma;
 				lens[i].rcore = potfile->core;
 				lens[i].rcut = potfile->cut;
 				lens[i].ellipticity_angle = lens[i].theta* DTR;
 
 			    //Calculate parameters like b0, potential ellipticity and anyother parameter depending on the profile
 			    module_readParameters_calculatePotentialparameter(&lens[i]);
 
 				//Variables
 				potfile->npotfile++;
 				i++;
 		}
 	}
 	else{
 			std::cout << "Error : file "  << potfile->potfile <<  " not found" << std::endl;
 			exit(-1);
 		}
 
 	IM.close();
 
 }
 */
 void module_readParameters_readpotfiles_SOA(const runmode_param *runmode, const cosmo_param *cosmology, potfile_param *potfile, Potential_SOA *lens){
 
 	std::string first, second, line1;
 	double cast_1, cast_2;
 	double aa,bb;
 	double DTR=acos(-1.)/180.;	/* 1 deg in rad  = pi/180 */
 	//cast variables
 	double cast_x,cast_y,cast_theta,cast_lum,cast_mag, cast_name;
 
 
 // Read in potentials
     std::ifstream IM(potfile->potfile,std::ios::in);
 	if ( IM )
     	{
 			int i = runmode->nhalos;
         	while( std::getline(IM,line1) )    // Read until we reach the end
         	{
     			std::istringstream read1(line1); // create a stream for the line
     			read1 >> first;
                 // Skip commented lines with #
-    			std::cerr << first << std::endl;
+    			//std::cerr << first << std::endl;
                  if (!strncmp(first.c_str(), "#REFERENCE", 10)){
                 	sscanf(line1.c_str(), " %*s %d%lf%lf", &potfile->reference_mode,  &cast_1, &cast_2);
                 	potfile->reference_ra = (type_t) cast_1;
                 	potfile->reference_dec = (type_t) cast_2;
                 	std::cerr << "Ref pot: "<< potfile->reference_ra << " " << potfile->reference_dec << std::endl;
                     continue;
 
 
                 }
                 // Skip commented lines with #
                 else if ( line1[0] == '#' )
                     continue;
 
                 //std::cerr << "Turn " << i << std::endl;
 
                 // Default initialisation of clump
                 lens->type[i] = potfile->type;
                 lens->z[i] = potfile->zlens;
                 lens->ellipticity_potential[i] = lens->ellipticity[i] = 0.;
                 lens->alpha[i]  = 0.;
                 lens->rcut[i] = 0.;
                 lens->rcore[i] = 0.;
                 lens->rscale[i] = 0.;
                 lens->mag[i] = 0.;
                 lens->lum[i] = 0.;
                 lens->vdisp[i] = 0.;
                 lens->position_x[i] = lens->position_y[i] = 0.;
                 lens->ellipticity_angle[i] = 0.;
                 lens->weight[i] = 0;
                 lens->exponent[i] = 0;
                 lens->einasto_kappacritic[i] = 0;
 
                 //std::cerr << "Init finished "<< std::endl;
 
 
 				// Read a line of the catalog
 				if ( potfile->ftype == 1 || potfile->ftype == 3 )
 				{
 						sscanf( line1.c_str(), "%lf%lf%lf%lf%lf%lf%lf%lf",
 							&cast_name, &cast_x, &cast_y,
 								 &aa, &bb, &cast_theta, &cast_mag, &cast_lum);
 						lens->ellipticity[i] = (type_t) (aa*aa-bb*bb)/(aa*aa+bb*bb);
 						if ( lens->ellipticity[i] < 0 )
 						{
 							fprintf( stderr, "ERROR: The potfile clump %d has a negative ellipticity.\n", i );
 							exit(-1);
 						}
 						//goto NEXT;
 
 				}
 
 
 				//Casting
 				lens->name[i] =(type_t)cast_name;
 				lens->position_x[i] =(type_t)cast_x;
 				lens->position_y[i] =(type_t)cast_y;
 				//module_cosmodistances_relativecoordinates_XY( &lens->position_x[i], &lens->position_y[i], potfile->reference_mode, potfile->reference_ra, potfile->reference_dec );
 				lens->theta[i] =(type_t)cast_theta;
 				lens->lum[i] =(type_t)cast_lum;
 				lens->mag[i] =(type_t)cast_mag;
 				//general parameters
 				//lens->vdisp[i] = potfile->sigma;
 				//lens->rcore[i] = potfile->core;
 				//lens->rcut[i] = potfile->cut;
 				lens->ellipticity_angle[i] = lens->theta[i]* DTR;
 				lens->anglecos[i]	     = cos(lens->ellipticity_angle[i]);
 				lens->anglesin[i] 	     = sin(lens->ellipticity_angle[i]);
 
 
 
 				//Variables
 				potfile->npotfile++;
 				i++;
 		}
 
         	setScalingRelations(runmode,cosmology,potfile,lens);
 
 
 
 
 
 
 
 	}
 	else{
 			std::cout << "Error : file "  << potfile->potfile <<  " not found" << std::endl;
 			exit(-1);
 		}
 
 	IM.close();
 
 }
 
 void setScalingRelations(const runmode_param *runmode, const cosmo_param *cosmology, potfile_param *pot, Potential_SOA* lenses){
 
     //*********************************************************************
     // Check if the scaling relations are defined
     //*********************************************************************
     if ( pot->ftype == 1 )
     {
         if ( pot->sigma1 == -1 )
         {
             fprintf(stderr, "ERROR: potfile: sigma not defined\n");
             exit(-1);
         }
 
         if ( pot->cutkpc1 == DBL_MAX && pot->cut1 == DBL_MAX )
         {
             fprintf(stderr, "ERROR: potfile: cut length not defined\n");
             exit(-1);
         }
 
         if ( pot->corekpc == -1 && pot->core == -1 )
         {
             fprintf(stderr, "ERROR: potfile: core length not defined\n");
             exit(-1);
         }
     }
     else{
     	fprintf(stderr, "ERROR: potfile: potfile type %d not supported\n", pot->ftype);
     	            exit(-1);
     }
 
     //*********************************************************************
     // Set the Potfile current and limiting values
     //*********************************************************************
     if ( pot->ftype <= 4 )
     {
         // Scale potfile SIGMA
         pot->sigma = pot->sigma1;
 
         // ... and potfile RCUT
         if ( pot->cut1 == DBL_MAX && pot->cutkpc1 != DBL_MAX )
         {
             pot->cut1 = pot->cutkpc1 / (d0 / cosmology->h * module_cosmodistances_observerObject(pot->zlens,*cosmology));
             pot->cut2 = pot->cutkpc2 / (d0 / cosmology->h * module_cosmodistances_observerObject(pot->zlens,*cosmology));
         }
         pot->cut = pot->cut1;
 
         // ... and potfile RCORE
         if ( pot->core == -1.0 && pot->corekpc != -1 )
             pot->core = pot->corekpc / (d0 / cosmology->h * module_cosmodistances_observerObject(pot->zlens,*cosmology));
 
         // ... and potfile RCUT SLOPE
         pot->slope = pot->slope1;
 
         // ... and potfile VDSLOPE
         pot->vdslope = pot->vdslope1;
     }
 
     // set potfile VDSCAT for all potfile scaling relations
      pot->vdscat = pot->vdscat1;
 
      // ... and potfile RCUTSCAT
      pot->rcutscat = pot->rcutscat1;
 
 
      for ( int i = runmode->nhalos ; i < runmode->npotfile+runmode->nhalos ; i++ ){
          if ( lenses->mag[i] != 0 ){
         	 lenses->rcore[i] = pot->core * pow(10., 0.4 * (pot->mag0 - lenses->mag[i]) / 2.);}
 		 /*
 		  * Scale the sigma and rcut of a potfile clump according to the potfile parameters
 		  */
          // loop over the potfile clumps to scale
          if ( pot->ftype <= 4 )
          {
 
 
                  if ( lenses->mag[i] != 0 )
                  {
                 	 lenses->vdisp[i] = pot->sigma *
                                     pow(10., 0.4 * (pot->mag0 - lenses->mag[i]) / pot->vdslope);
                      /* The factor of 2 so that with slope1 = 4, we have
                       * 2/slope1=1/2, then Brainerd, Blandford, Smail, 1996 */
                 	 lenses->rcut[i] = pot->cut *
                                    pow(10., 0.4 * (pot->mag0 - lenses->mag[i]) * 2. / pot->slope);
                  }
 
                  if ( pot->ivdscat != 0 ){
                 	 fprintf(stderr, "ERROR: potfile: ivdscat not supported yet\n");
                 	 exit(-1);
                  }
 
                  // Convert sigma to b0
                  //set_dynamics(i);
 
                  if ( pot->ircutscat != 0 ){
                 	 fprintf(stderr, "ERROR: potfile: ircutscat not supported yet\n");
                 	 exit(-1);
                  }
              }
          //Calculate parameters like b0, potential ellipticity and anyother parameter depending on the profile
 		 module_readParameters_calculatePotentialparameter_SOA(lenses,i);
          }
 
 
 
 }
 
 
 
 /** @brief read the information about arclets
 * !Not used! Will be reworked
 * This module function reads in the arclet images for weak lensing
 * @param Arclet file
 */
 
 
 void module_readParameters_arclets(std::string arclets_filename, point arclets_position[], ellipse arclets_shape[], double arclets_redshift[])
 {
 	std::string second, line1;
 	int j=0;
 	//cast variables
 	double cast_x,cast_y,cast_a,cast_b,cast_theta,cast_z;
 
 
 /******************** read the arclets file *******************/
 /* the arclets file must contain
 			id	x_center y_center   a   b   theta   redshift
 line :	1          	 .	   .        .       .   .     .        .        
 	2
 	3
 	.
 	.
 	narclets
 */
 
     std::ifstream IM(arclets_filename.c_str(),std::ios::in);
 	if ( IM )
     	{
         	while( std::getline(IM,line1))    // Read every line
         	{       // Read in parameters, * means we read the parameter but don't store it (*s)   
 			sscanf(line1.c_str(), "%*s %lf %lf %lf %lf %lf %lf", &cast_x, &cast_y, &cast_a, &cast_b, &cast_theta, &cast_z);
 			//Casting
 			arclets_position[j].x =(type_t)cast_x;
 			arclets_position[j].y =(type_t)cast_y;
 			arclets_shape[j].a =(type_t)cast_a;
 			arclets_shape[j].b =(type_t)cast_b;
 			arclets_shape[j].theta =(type_t)cast_theta;
 			arclets_redshift[j] =(type_t)cast_z;
 
 			j++;
 		}
 		IM.close();
 	}
 	else
 	{
 		printf("Error : file %s not found\n",arclets_filename.c_str());
 		exit(-1);
 	}
 }
 
 
 /** @brief This module function reads in if a parameter will be optimized by the MCMC or stay fixed. 
  * 
  * This module function reads in if a parameter will be optimized by the MCMC or stay fixed. 
  * If it will be optimized, it specifies its minimum and maximum allowed values. Unless declared otherwise by the user, input values are fixed and won't be optimized.
 * 
 * read an infile :
 	|  .
 	|  .
 	|limit
 	|	x_center x  x  x  x  <--- these values contains : block  min  max  accuracy
 	|	y_center x  x  x  x		if block=1 this is a free parameter, otherwise not
 	|	   .	 .  .  .  .		min and max define the extremal value of this parameter
 	|	   .	 .  .  .  .		accuracy is a criterium of convergence for the MCMC
 	|	  end
 	|  .
 	|  .
 	|limit
 	|	x_center x  x  x  x
 	|	y_center x  x  x  x
 	|	   .	 .  .  .  .
 	|	   .	 .  .  .  .
 	|	  end
 	|  .
 	|  .
 	|finish
 and fills the variables with these values
 * @param infile path to input file
 * @param host_potentialoptimization array where limits will be stored
 * @param nhalos number of mass distributions
 
 */
 
 void module_readParameters_limit(std::string infile, struct potentialoptimization host_potentialoptimization[], int nhalos )
 {
     std::string first, second, line1, line2;
     int i=0;
 	//cast variables
 	double cast_min,cast_max,cast_sigma;
 	//double d1 = d0 / cosmology.h * module_cosmodistances_observerObject(lens_temp.z,cosmology);
 
  type_t DTR=acos(-1.)/180.;	/* 1 deg in rad  = pi/180 */
 
 /*** initialize the block variables to zero (= not to be optimized) ***/
 for(int index=0; index<nhalos; index++)
 {
 	host_potentialoptimization[index].position.x.block=0;
 	host_potentialoptimization[index].position.y.block=0;
 	host_potentialoptimization[index].vdisp.block = 0;
 	host_potentialoptimization[index].weight.block=0;
 	host_potentialoptimization[index].ellipticity_angle.block=0;
 	host_potentialoptimization[index].ellipticity.block=0;
 	host_potentialoptimization[index].rcore.block=0;
 	host_potentialoptimization[index].rcut.block=0;
 	host_potentialoptimization[index].rscale.block=0;
 	host_potentialoptimization[index].exponent.block=0;
 	host_potentialoptimization[index].alpha.block=0;
 	host_potentialoptimization[index].einasto_kappacritic.block=0;
 	host_potentialoptimization[index].z.block=0;
 }
 
 
 
 
 // Read in 
 std::ifstream IN(infile.c_str(), std::ios::in);
     if ( IN )
     {
 	while(std::getline(IN,line1))
 	{   
 	    	std::istringstream read1(line1);
 	    	read1 >> first;
 
 
 		if (!strncmp(first.c_str(), "limit", 5))  // Read the limits
 		{
                 while(std::getline(IN,line2))
                 {
                     std::istringstream read2(line2);
                     read2 >> second;    // Read in 1 word
                     if (strcmp(second.c_str(), "end") == 0) break;  // stop reading at "end" and move to next potential limit section
                 
 
                     if (!strcmp(second.c_str(), "x_centre") ||    // Read in for x center
 	                !strcmp(second.c_str(), "x_center") )
                     {
                         sscanf(line2.c_str(), "%*s %d %lf %lf %lf", &host_potentialoptimization[i].position.x.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].position.x.min =(type_t)cast_min;
                         host_potentialoptimization[i].position.x.max =(type_t)cast_max;
                         host_potentialoptimization[i].position.x.sigma =(type_t)cast_sigma;
                     }
                     else if (!strcmp(second.c_str(), "y_centre") ||  // Read in for y center
             		 !strcmp(second.c_str(), "y_center") )
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].position.y.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].position.y.min =(type_t)cast_min;
                         host_potentialoptimization[i].position.y.max =(type_t)cast_max;
                         host_potentialoptimization[i].position.y.sigma =(type_t)cast_sigma;
                     }
                     else if ( !strcmp(second.c_str(), "v_disp") )  // Read in for ellipticity
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].vdisp.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].vdisp.min =(type_t)cast_min;
                         host_potentialoptimization[i].vdisp.max =(type_t)cast_max;
                         host_potentialoptimization[i].vdisp.sigma =(type_t)cast_sigma;
                     }
                     else if ( !strcmp(second.c_str(), "ellip_pot") )  // Read in for ellipticity
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].ellipticity_potential.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].ellipticity_potential.min =(type_t)cast_min;
                         host_potentialoptimization[i].ellipticity_potential.max =(type_t)cast_max;
                         host_potentialoptimization[i].ellipticity_potential.sigma =(type_t)cast_sigma;
                     }
                     else if ( !strcmp(second.c_str(), "ellipticitymass") || !strcmp(second.c_str(), "ellipticity") || !strcmp(second.c_str(), "ellipticite") )  // Read in for ellipticity
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].ellipticity.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].ellipticity.min =(type_t)cast_min;
                         host_potentialoptimization[i].ellipticity.max =(type_t)cast_max;
                         host_potentialoptimization[i].ellipticity.sigma =(type_t)cast_sigma;
                     }
                     else if (!strcmp(second.c_str(), "ellipticity_angle") || !strcmp(second.c_str(), "angle_pos"))  // Read in for ellipticity angle
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].ellipticity_angle.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].ellipticity_angle.min =(type_t)cast_min;
                         host_potentialoptimization[i].ellipticity_angle.max =(type_t)cast_max;
                         host_potentialoptimization[i].ellipticity_angle.sigma =(type_t)cast_sigma;
 
                         host_potentialoptimization[i].ellipticity_angle.min *= DTR;
                         host_potentialoptimization[i].ellipticity_angle.max *= DTR;
                         host_potentialoptimization[i].ellipticity_angle.sigma *= DTR;
                     }
                     else if ( !strcmp(second.c_str(), "rcut") || !strcmp(second.c_str(), "cut_radius"))    // Read in for r cut
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].rcut.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].rcut.min =(type_t)cast_min;
                         host_potentialoptimization[i].rcut.max =(type_t)cast_max;
                         host_potentialoptimization[i].rcut.sigma =(type_t)cast_sigma;
                     }
                     else if (  !strcmp(second.c_str(), "cut_radius_kpc"))    // Read in for r cut
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].rcut.block,
                         		&cast_min, &cast_max, &cast_sigma);
                     }
                     else if ( !strcmp(second.c_str(), "rcore") || !strcmp(second.c_str(), "core_radius"))  // Read in for core radius
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].rcore.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].rcore.min =(type_t)cast_min;
                         host_potentialoptimization[i].rcore.max =(type_t)cast_max;
                         host_potentialoptimization[i].rcore.sigma =(type_t)cast_sigma;
                     }
                     else if ( !strcmp(second.c_str(), "NFW_rs") ||    // Read in for NFW scale radius
                               !strcmp(second.c_str(), "rscale") )
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].rscale.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].rscale.min =(type_t)cast_min;
                         host_potentialoptimization[i].rscale.max =(type_t)cast_max;
                         host_potentialoptimization[i].rscale.sigma =(type_t)cast_sigma;
                     }
             	else if ( !strcmp(second.c_str(), "exponent") )    // Read in for exponent
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].exponent.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].exponent.min =(type_t)cast_min;
                         host_potentialoptimization[i].exponent.max =(type_t)cast_max;
                         host_potentialoptimization[i].exponent.sigma =(type_t)cast_sigma;
                     }
             	else if ( !strcmp(second.c_str(), "alpha") )    // Read in for alpha
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].alpha.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].alpha.min =(type_t)cast_min;
                         host_potentialoptimization[i].alpha.max =(type_t)cast_max;
                         host_potentialoptimization[i].alpha.sigma =(type_t)cast_sigma;
                     }
             	else if ( !strcmp(second.c_str(), "einasto_kappacritic") )  // Read in for critical kappa
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].einasto_kappacritic.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].einasto_kappacritic.min =(type_t)cast_min;
                         host_potentialoptimization[i].einasto_kappacritic.max =(type_t)cast_max;
                         host_potentialoptimization[i].einasto_kappacritic.sigma =(type_t)cast_sigma;
                     }
                     else if (!strcmp(second.c_str(), "virial_mass") ||  // Read in for virial mass
                              !strcmp(second.c_str(), "masse") ||
                              !strcmp(second.c_str(), "m200") ||
                              !strcmp(second.c_str(), "mass"))
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].weight.block,
                         		&cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].weight.min =(type_t)cast_min;
                         host_potentialoptimization[i].weight.max =(type_t)cast_max;
                         host_potentialoptimization[i].weight.sigma =(type_t)cast_sigma;
                     }
                     else if ( !strcmp(second.c_str(), "z_lens") )    // Read in for redshift
                     {
                         sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].z.block,
                                &cast_min, &cast_max, &cast_sigma);
                         host_potentialoptimization[i].z.min =(type_t)cast_min;
                         host_potentialoptimization[i].z.max =(type_t)cast_max;
                         host_potentialoptimization[i].z.sigma =(type_t)cast_sigma;
                     }
                 }  // end of inner while loop
                 i++; // Move to next potential
     		}
 	}
     }
 IN.close();
 }
 
 
 /** @brief This module function reads in the potential form and its parameters (e.g. NFW).
 * 
 * @param infile path to input file
 * @param lens array where mass distributions will be stored
 * @param nhalos number of mass distributions
 */
 
 
 void module_readParameters_Potential(std::string infile, Potential lens[], int nhalos)
 {
 
 	double DTR=acos(-1.)/180.;	/* 1 deg in rad  = pi/180 */
     Potential  *ilens;
     std::string first, second, third, line1, line2;
     int i=0;
 
 std::ifstream IN(infile.c_str(), std::ios::in);
     if ( IN )
     {
 	while(std::getline(IN,line1))
         {
 	    std::istringstream read1(line1); // create a stream for the line
 	    read1 >> first;
             if (!strncmp(first.c_str(), "potent", 6))  // Read in potential
 		{
 /***********************************************/
 
     ilens = &lens[i];
 
     ilens->position.x  		 = ilens->position.y = 0.;
     ilens->ellipticity 		 = 0;
     ilens->ellipticity_potential = 0.;
     ilens->ellipticity_angle 	 = 0.;
     ilens->rcut 		 = 0.;
     ilens->rcore 		 = 0;
     ilens->weight 		 = 0;
     ilens->rscale 		 = 0;
     ilens->exponent 		 = 0;
     ilens->alpha 		 = 0.;
     ilens->einasto_kappacritic   = 0;
     ilens->z 			 = 0;
 
 
     while(std::getline(IN,line2))
     {
     	std::istringstream read2(line2);
     	read2 >> second >> third;
     	if (strcmp(second.c_str(), "end") == 0)  // Move to next potential and initialize it
         {
    	    if ( ilens->z == 0. )  // Check if redshift from current halo was initialized
             {
                 fprintf(stderr, "ERROR: No redshift defined for potential %d\n", i);
                 exit(-1);
             }
       
             break; // Break while loop and move to next potential
 
         }
     
  	// Read in values
  	
         if ( !strcmp(second.c_str(), "profil") ||  // Get profile
              !strcmp(second.c_str(), "profile") )
         {
 		if(!strcmp(third.c_str(), "PIEMD") ||
 		   !strcmp(third.c_str(), "1") )
 		{
 			ilens->type=1;
 			strcpy(ilens->type_name,"PIEMD");//ilens->type_name="PIEMD";
 		}
 		if(!strcmp(third.c_str(), "NFW") ||
 		   !strcmp(third.c_str(), "2") )
 		{
 			ilens->type=2;
 			strcpy(ilens->type_name,"NFW");//ilens->type_name="NFW";
 		}
 		if(!strcmp(third.c_str(), "SIES") ||
 		   !strcmp(third.c_str(), "3") )
 		{
 			ilens->type=3;
 			strcpy(ilens->type_name,"SIES");//ilens->type_name="SIES";
 		}
 		if(!strncmp(third.c_str(), "point", 5) ||
 		   !strcmp(third.c_str(), "4") )
 		{
 			ilens->type=4;
 			strcpy(ilens->type_name,"point");//ilens->type_name="point";
 		}
 		if(!strcmp(third.c_str(), "SIE") ||
 		   !strcmp(third.c_str(), "5") )
 		{
 			ilens->type=5;
 			strcpy(ilens->type_name,"SIE");//ilens->type_name="point";
 		}
 		if(!strcmp(third.c_str(), "8") )
 		{
 			ilens->type=8;
 			strcpy(ilens->type_name,"PIEMD1");//ilens->type_name="point";
 		}
 		if(!strcmp(third.c_str(), "81") )
 		{
 			ilens->type=81;
 			strcpy(ilens->type_name,"PIEMD81");//ilens->type_name="point";
 		}
 		
         }
         
         else if (!strcmp(second.c_str(), "name"))    // Get name of lens
         {
             sscanf(third.c_str(),"%s",ilens->name);
         }
         
         else if (!strcmp(second.c_str(), "x_centre") ||  // Get x center
 		 !strcmp(second.c_str(), "x_center") )
         {
             ilens->position.x=atof(third.c_str());
             //std::cout << "PositionX : " << std::setprecision(15) << ilens->position.x << std::endl;
         }
         else if (!strcmp(second.c_str(), "y_centre") ||  // Get y center
 		 !strcmp(second.c_str(), "y_center") )
         {
             ilens->position.y=atof(third.c_str());
         }
         else if ( !strcmp(second.c_str(), "ellipticitymass") || !strcmp(second.c_str(), "ellipticity") )  // Get ellipticity
         {
             ilens->ellipticity=atof(third.c_str());
             //ilens->ellipticity=ilens->ellipticity/3.;
         }
         else if (!strcmp(second.c_str(), "ellipticity_angle") || !strcmp(second.c_str(), "angle_pos"))  // Get ellipticity angle
         {
             ilens->ellipticity_angle=atof(third.c_str());
             ilens->ellipticity_angle *= DTR;
         }
         else if ( !strcmp(second.c_str(), "rcore") || !strcmp(second.c_str(), "core_radius"))  // Get core radius
         {
             ilens->rcore=atof(third.c_str());
         }
         else if (!strcmp(second.c_str(), "rcut") || !strcmp(second.c_str(), "cut_radius"))  // Get cut radius
         {
             ilens->rcut=atof(third.c_str());
         }
 		else if (!strcmp(second.c_str(), "NFW_rs") ||  // Get scale radius of NFW
 			 !strcmp(second.c_str(), "rscale"))
 	        {
 	            ilens->rscale=atof(third.c_str());
 	        }
 		else if (!strcmp(second.c_str(), "exponent") )  // Get exponent
 	        {
 	            ilens->exponent=atof(third.c_str());
 	        }
 		else if (!strcmp(second.c_str(), "alpha") )  // Get alpha
 	        {
 	            ilens->alpha=atof(third.c_str());
 	        }
 		else if (!strcmp(second.c_str(), "einasto_kappacritic") ||  // Get critical kappa 
 			 !strcmp(second.c_str(), "kappacritic"))
         {
             ilens->einasto_kappacritic=atof(third.c_str());
         }
         else if (!strcmp(second.c_str(), "z_lens"))  // Get redshift
         {
             ilens->z=atof(third.c_str());
         }
         else if (!strcmp(second.c_str(), "v_disp"))  // Get Dispersion velocity
         {
             ilens->vdisp=atof(third.c_str());
         }
         else if ( !strncmp(second.c_str(), "virial_mass", 6) ||  // Get virial mass
                   !strcmp(second.c_str(), "masse") ||
                   !strcmp(second.c_str(), "m200") ||
                   !strcmp(second.c_str(), "mass") )
         {
             ilens->weight=atof(third.c_str());
         }
 
 
     } // closes inner while loop
     
     //Calculate parameters like b0, potential ellipticity and anyother parameter depending on the profile
     module_readParameters_calculatePotentialparameter(ilens);
   
     i++; // Set counter to next potential
 
 
 
 
 }  // closes if loop
 
 }  // closes while loop
 
 if(i==0){
 	printf("Parameter potential not found in the file %s",infile.c_str());
 	exit(-1);
 	}
 if(i>1){
 	for(int j=1; j<i; j++)
 		{
 		if(lens[j].z!=lens[j-1].z) printf("Warning : Some halos have different redshifts ! They should be in the same plane (same redshift) !\n");
 		}
 	}
 
 }  // closes if(IN)
 
 IN.close();
 
 }
 
 /** @brief Finds the amount of potential of same types. Has to be updated when introducing a new type of potential.
 *
 */
 
 
 void read_potentialSOA_ntypes(std::string infile, int N_type[]){
 
 	int ind = 0;
 	std::string first, second, third, line1, line2;
 
     std::ifstream IN(infile.c_str(), std::ios::in);
     //First sweep throught the runmode file to find N_type (number of types)
     if ( IN )
     {
 	while(std::getline(IN,line1))
         {
 	    std::istringstream read1(line1); // create a stream for the line
 	    read1 >> first;
             if (!strncmp(first.c_str(), "potent", 6))  // Read in potential
 			{
                 while(std::getline(IN,line2))
                 {
                 	std::istringstream read2(line2);
                 	read2 >> second >> third;
             	if (strcmp(second.c_str(), "end") == 0)  // Move to next potential and initialize it
 					{
 						break; // Break while loop and move to next potential
 					}
 
             	if ( !strcmp(second.c_str(), "profil") ||  // Get profile
 						 !strcmp(second.c_str(), "profile") )
 					{
 					if(!strcmp(third.c_str(), "PIEMD") ||
 					   !strcmp(third.c_str(), "1") )
 					{
 						ind=atoi(third.c_str());
 						N_type[ind] += 1;
 					}
 
 					if(!strcmp(third.c_str(), "NFW") ||
 					   !strcmp(third.c_str(), "2") )
 					{
 						ind=atoi(third.c_str());
 						N_type[ind] += 1;
 					}
 					if(!strcmp(third.c_str(), "SIES") ||
 					   !strcmp(third.c_str(), "3") )
 					{
 						ind=atoi(third.c_str());
 						N_type[ind] += 1;
 					}
 					if(!strncmp(third.c_str(), "point", 5) ||
 					   !strcmp(third.c_str(), "4") )
 					{
 						ind=atoi(third.c_str());
 						N_type[ind] += 1;
 					}
 					if(!strcmp(third.c_str(), "SIE") ||
 					   !strcmp(third.c_str(), "5") )
 					{
 						ind=atoi(third.c_str());
 						N_type[ind] += 1;
 					}
 					if(!strcmp(third.c_str(), "8") )	//PIEMD
 					{
 						ind=atoi(third.c_str());
 						N_type[ind] += 1;
 					}
 					if(!strcmp(third.c_str(), "81") )	//PIEMD81
 					{
 						ind=atoi(third.c_str());
 						N_type[ind] += 1;
 						//std::cerr << "Type First: " << ind << std::endl;
 					}
 				}
                 }
 			}
 		}
     }
 
 	IN.close();
 	IN.clear();
 	IN.open(infile.c_str(), std::ios::in);
 
 }
 
 void module_readParameters_PotentialSOA_direct(std::string infile, Potential_SOA *lens_SOA, int nhalos, int npotfiles, cosmo_param cosmology){
 
 
 	double DTR=acos(-1.)/180.;	/* 1 deg in rad  = pi/180 */
 
 	double core_radius_kpc = 0.;
 	double cut_radius_kpc = 0.;
 
 	int N_type[100];
 	int Indice_type[100];
 	int ind;
 	Potential lens_temp;
 	//Init of lens_SOA
 	lens_SOA->name = 	new type_t[nhalos+npotfiles];
 	lens_SOA->type = 	new int[nhalos+npotfiles];
 	lens_SOA->position_x  = 	new type_t[nhalos+npotfiles];
 	lens_SOA->position_y = 		new type_t[nhalos+npotfiles];
 	lens_SOA->b0 = 	new type_t[nhalos+npotfiles];
 	lens_SOA->vdisp = 	new type_t[nhalos+npotfiles];
 	lens_SOA->ellipticity_angle = new type_t[nhalos+npotfiles];
 	lens_SOA->ellipticity = new type_t[nhalos+npotfiles];
 	lens_SOA->ellipticity_potential = new type_t[nhalos+npotfiles];
 	lens_SOA->rcore = 	new type_t[nhalos+npotfiles];
 	lens_SOA->rcut = 	new type_t[nhalos+npotfiles];
 	lens_SOA->z = 		new type_t[nhalos+npotfiles];
 	lens_SOA->anglecos = 	new type_t[nhalos+npotfiles];
 	lens_SOA->anglesin = 		new type_t[nhalos+npotfiles];
 
 	lens_SOA->mag = 	new type_t[nhalos+npotfiles];
 	lens_SOA->lum = 		new type_t[nhalos+npotfiles];
 	lens_SOA->weight = 	new type_t[nhalos+npotfiles];
 	lens_SOA->exponent = 		new type_t[nhalos+npotfiles];
 	lens_SOA->einasto_kappacritic = 		new type_t[nhalos+npotfiles];
 	lens_SOA->rscale = 		new type_t[nhalos+npotfiles];
 	lens_SOA->alpha = 		new type_t[nhalos+npotfiles];
 	lens_SOA->theta = 		new type_t[nhalos+npotfiles];
 
 
 
 
 	//Init of N_types and Indice_type (Number of lenses of a certain type)
 	for(int i=0;i < 100; ++i){
 		N_type[i] = 0;
 		Indice_type[i] = 0;
 	}
 
 
 
     //First sweep through the runmode file to find N_type (number of types)
     read_potentialSOA_ntypes(infile,N_type);
 
     //Calcuting starting points for each type in lens array
 	for(int i=1;i < 100; ++i){
 		Indice_type[i] = N_type[i]+Indice_type[i-1];
 		//printf("%d %d \n ",N_type[i], Indice_type[i]);
 	}
 
     std::string first, second, third, line1, line2;
     std::ifstream IN(infile.c_str(), std::ios::in);
 	if(IN){
 	while(std::getline(IN,line1))
         {
 	    std::istringstream read1(line1); // create a stream for the line
 	    read1 >> first;
             if (!strncmp(first.c_str(), "potent", 6))  // Read in potential
 		{
 			lens_temp.position.x = lens_temp.position.y = 0.;
 			lens_temp.ellipticity = 0;
 			lens_temp.ellipticity_potential = 0.;
 			lens_temp.ellipticity_angle = 0.;
 			lens_temp.vdisp = 0.;
 			lens_temp.rcut = 0.;
 			lens_temp.rcore = 0;
 			core_radius_kpc = 0.;
 			cut_radius_kpc = 0;
 			lens_temp.weight = 0;
 			lens_temp.rscale = 0;
 			lens_temp.exponent = 0;
 			lens_temp.alpha = 0.;
 			lens_temp.einasto_kappacritic = 0;
 			lens_temp.z = 0;
 			while(std::getline(IN,line2))
 			{
 				//Init temp potential
 
 
 
 				std::istringstream read2(line2);
 				read2 >> second >> third;
 				//std::cerr << line2 << std::endl;
 				if (strcmp(second.c_str(), "end") == 0)  // Move to next potential and initialize it
 				{
 					if ( lens_temp.z == 0. )  // Check if redshift from current halo was initialized
 						{
 							fprintf(stderr, "ERROR: No redshift defined for potential at position x: %f and y: %f \n", lens_temp.position.x , lens_temp.position.y);
 							exit(-1);
 						}
 					break; // Break while loop and move to next potential
 				}
 
 				//Find profile
 				if ( !strcmp(second.c_str(), "profil") ||  // Get profile
 					 !strcmp(second.c_str(), "profile") )
 				{
 					lens_temp.type=atoi(third.c_str());
 					//std::cerr << lens_temp.type << std::endl;
 				}
 
 				else if (!strcmp(second.c_str(), "name"))    // Get name of lens
 				{
 					sscanf(third.c_str(),"%s",lens_temp.name);
 				}
 
 				else if (!strcmp(second.c_str(), "x_centre") ||  // Get x center
 				 !strcmp(second.c_str(), "x_center") )
 				{
 					lens_temp.position.x=atof(third.c_str());
 					//std::cout << "PositionX : " << std::setprecision(15) << lens_temp.position.x << std::endl;
 				}
 				else if (!strcmp(second.c_str(), "y_centre") ||  // Get y center
 				 !strcmp(second.c_str(), "y_center") )
 				{
 					lens_temp.position.y=atof(third.c_str());
 				}
 				else if ( !strcmp(second.c_str(), "ellipticitymass") || !strcmp(second.c_str(), "ellipticity") || !strcmp(second.c_str(), "ellipticite") )  // Get ellipticity
 				{
 					lens_temp.ellipticity=atof(third.c_str());
 					//lens_temp.ellipticity=lens_temp.ellipticity/3.;
 				}
 				else if (!strcmp(second.c_str(), "ellipticity_angle") || !strcmp(second.c_str(), "angle_pos"))  // Get ellipticity angle
 				{
 					lens_temp.ellipticity_angle=atof(third.c_str());
 					lens_temp.ellipticity_angle *= DTR;
 				}
 				else if ( !strcmp(second.c_str(), "rcore") || !strcmp(second.c_str(), "core_radius"))  // Get core radius
 				{
 					lens_temp.rcore=atof(third.c_str());
 				}
 				else if (!strcmp(second.c_str(), "rcut") || !strcmp(second.c_str(), "cut_radius"))  // Get cut radius
 				{
 					lens_temp.rcut=atof(third.c_str());
 				}
 				else if ( !strcmp(second.c_str(), "core_radius_kpc"))  // Get core radius
 				{
 					core_radius_kpc=atof(third.c_str());
 				}
 				else if (!strcmp(second.c_str(), "cut_radius_kpc"))  // Get cut radius
 				{
 					cut_radius_kpc=atof(third.c_str());
 				}
 				else if (!strcmp(second.c_str(), "NFW_rs") ||  // Get scale radius of NFW
 					 !strcmp(second.c_str(), "rscale"))
 					{
 						lens_temp.rscale=atof(third.c_str());
 					}
 				else if (!strcmp(second.c_str(), "exponent") )  // Get exponent
 					{
 						lens_temp.exponent=atof(third.c_str());
 					}
 				else if (!strcmp(second.c_str(), "alpha") )  // Get alpha
 					{
 						lens_temp.alpha=atof(third.c_str());
 					}
 				else if (!strcmp(second.c_str(), "einasto_kappacritic") ||  // Get critical kappa
 					 !strcmp(second.c_str(), "kappacritic"))
 				{
 					lens_temp.einasto_kappacritic=atof(third.c_str());
 				}
 				else if (!strcmp(second.c_str(), "z_lens"))  // Get redshift
 				{
 					lens_temp.z=atof(third.c_str());
 					//std::cerr << lens_temp.z << std::endl;
 				}
 				else if (!strcmp(second.c_str(), "v_disp"))  // Get Dispersion velocity
 				{
 					lens_temp.vdisp=atof(third.c_str());
 					//std::cerr << "vdisp : "<< third << " " << lens_temp.vdisp << std::endl;
 				}
 				else if ( !strncmp(second.c_str(), "virial_mass", 6) ||  // Get virial mass
 						  !strcmp(second.c_str(), "masse") ||
 						  !strcmp(second.c_str(), "m200") ||
 						  !strcmp(second.c_str(), "mass") )
 				{
 					lens_temp.weight=atof(third.c_str());
 				}
 
 
 
 			} // closes inner while loop
 
 	// Converting distance in kpc to arcsec.
 	double d1 = d0 / cosmology.h * module_cosmodistances_observerObject(lens_temp.z,cosmology);
-	printf(" D1 HPC : %f %f %f %f\n",d1, d0,cosmology.h,lens_temp.z );
+	//printf(" D1 HPC : %f %f %f %f\n",d1, d0,cosmology.h,lens_temp.z );
 	// Set rcore value in kpc or in arcsec.
 	if ( core_radius_kpc != 0. )
 		lens_temp.rcore = core_radius_kpc / d1;
 	else
 		core_radius_kpc = lens_temp.rcore * d1;
 
 	// Set rcut value in kpc or in arcsec.
 	if ( cut_radius_kpc != 0. )
 	{
 		//std::cerr << "d1 " << d1 << std::endl;
 		lens_temp.rcut = cut_radius_kpc / d1;}
 	else
 		cut_radius_kpc = lens_temp.rcut * d1;
 
     //Calculate parameters like b0, potential ellipticity and anyother parameter depending on the profile
     module_readParameters_calculatePotentialparameter(&lens_temp);
 
     //assign value to SOA
     //std::cerr << "Type + indice :" << lens_temp.type << Indice_type[lens_temp.type-1] << std::endl;
 	if(Indice_type[lens_temp.type-1] <nhalos){
 
 		ind = Indice_type[lens_temp.type-1];
 		//std::cerr<< ind << std::endl;
 		lens_SOA->type[ind] 		     = lens_temp.type;
 		lens_SOA->position_x[ind]  	     = lens_temp.position.x;
 		lens_SOA->position_y[ind] 	     = lens_temp.position.y;
 		lens_SOA->b0[ind] 		     = 		lens_temp.b0;
 		lens_SOA->vdisp[ind] 		     = 		lens_temp.vdisp;
 		lens_SOA->ellipticity_angle[ind]     = lens_temp.ellipticity_angle;
 		lens_SOA->ellipticity[ind]           = lens_temp.ellipticity;
 		lens_SOA->ellipticity_potential[ind] = lens_temp.ellipticity_potential;
 		lens_SOA->rcore[ind] 		     = lens_temp.rcore;
 		lens_SOA->rcut[ind] 		     = lens_temp.rcut;
 		lens_SOA->z[ind] 		     = lens_temp.z;
 		lens_SOA->anglecos[ind] 	     = cos(lens_temp.ellipticity_angle);
 		lens_SOA->anglesin[ind] 	     = sin(lens_temp.ellipticity_angle);
 
 		Indice_type[lens_temp.type-1] += 1;
 	}
 }  // closes if loop
 
 }  // closes while loop
 	}
 	IN.close();
 
 }
 
 /** @brief This module function converts potentials to potentieal_SOA (AOS to SOA conversion)
 *
 */
 
 void module_readParameters_PotentialSOA(std::string infile, Potential *lens, Potential_SOA *lens_SOA, int nhalos){
 
 	lens_SOA->type = 	new int[nhalos];
 	lens_SOA->position_x  = 	new type_t[nhalos];
 	lens_SOA->position_y = 		new type_t[nhalos];
 	lens_SOA->b0 = 	new type_t[nhalos];
 	lens_SOA->ellipticity_angle = new type_t[nhalos];
 	lens_SOA->ellipticity = new type_t[nhalos];
 	lens_SOA->ellipticity_potential = new type_t[nhalos];
 	lens_SOA->rcore = 	new type_t[nhalos];
 	lens_SOA->rcut = 	new type_t[nhalos];
 	lens_SOA->z = 		new type_t[nhalos];
 	lens_SOA->anglecos = 	new type_t[nhalos];
 	lens_SOA->anglesin = 		new type_t[nhalos];
 
 	int N_type[100];
 	int Indice_type[100];
 	int ind;
 
 	for(int i=0;i < 100; ++i){
 			N_type[i] = 0;
 			Indice_type[i] = 0;
 	}
 
 	for (int i = 0; i < nhalos; ++i){
 		N_type[lens[i].type] += 1;
 	}
 	for(int i=1;i < 100; ++i){
 		Indice_type[i] = N_type[i]+Indice_type[i-1];
 		//printf("%d %d \n ",N_type[i], Indice_type[i]);
 	}
 
 
 	for (int i = 0; i < nhalos; ++i){
 
 		if(Indice_type[lens[i].type-1] <nhalos){
 
 			ind = Indice_type[lens[i].type-1];
 			//printf ("%d ", ind);
 			lens_SOA->type[ind] 		     = lens[i].type;
 			lens_SOA->position_x[ind]  	     = lens[i].position.x;
 			//std::cerr << lens_SOA[1].position_x[*i_point] << " " << lens[i].position.x  << std::endl;
 			lens_SOA->position_y[ind] 	     = lens[i].position.y;
 			lens_SOA->b0[ind] 		     = 		lens[i].b0;
 			lens_SOA->ellipticity_angle[ind]     = lens[i].ellipticity_angle;
 			lens_SOA->ellipticity[ind]           = lens[i].ellipticity;
 			lens_SOA->ellipticity_potential[ind] = lens[i].ellipticity_potential;
 			lens_SOA->rcore[ind] 		     = lens[i].rcore;
 			lens_SOA->rcut[ind] 		     = lens[i].rcut;
 			lens_SOA->z[ind] 		     = lens[i].z;
 			lens_SOA->anglecos[ind] 	     = cos(lens[i].ellipticity_angle);
 			lens_SOA->anglesin[ind] 	     = sin(lens[i].ellipticity_angle);
 
 			Indice_type[lens[i].type-1] += 1;
 		}
 	}
 
 	//printf("Bla anglecos = %f\n", lens_SOA->anglecos[0]);
 
 }
 
 /** @brief This module function calculates profile depended information like the impactparameter b0 and the potential ellipticity epot
  * 
  * @param lens: mass distribution for which to calculate parameters
 */
 
 void module_readParameters_calculatePotentialparameter(Potential *lens){
 
 	
 
 
 	switch (lens->type)
     {
 	
 	case(5): /*Elliptical Isothermal Sphere*/
 		//impact parameter b0
 		lens->b0 = 4* pi_c2 * lens->vdisp * lens->vdisp ;
 		//ellipticity_potential 
 		lens->ellipticity_potential = lens->ellipticity/3 ;
 	    break;
 	    
 	case(8): /* PIEMD */
 		//impact parameter b0
 		lens->b0 = 6.*pi_c2 * lens->vdisp * lens->vdisp;
 		//ellipticity_parameter
 	    if ( lens->ellipticity == 0. && lens->ellipticity_potential != 0. ){
 			// emass is (a2-b2)/(a2+b2)
 			lens->ellipticity = 2.*lens->ellipticity_potential / (1. + lens->ellipticity_potential * lens->ellipticity_potential);
 			//printf("1 : %f %f \n",lens->ellipticity,lens->ellipticity_potential);
 		}
 		else if ( lens->ellipticity == 0. && lens->ellipticity_potential == 0. ){
 			lens->ellipticity_potential = 0.00001;
 			//printf("2 : %f %f \n",lens->ellipticity,lens->ellipticity_potential);
 		}
 		else{
 			// epot is (a-b)/(a+b)
 			lens->ellipticity_potential = (1. - sqrt(1 - lens->ellipticity * lens->ellipticity)) / lens->ellipticity;
 			//printf("3 : %f %f \n",lens->ellipticity,lens->ellipticity_potential);
 		}
         break;
 
 	case(81): /* PIEMD */
 		//impact parameter b0
 		lens->b0 = 6.*pi_c2 * lens->vdisp * lens->vdisp;
 		//ellipticity_parameter
 	    if ( lens->ellipticity == 0. && lens->ellipticity_potential != 0. ){
 			// emass is (a2-b2)/(a2+b2)
 			lens->ellipticity = 2.*lens->ellipticity_potential / (1. + lens->ellipticity_potential * lens->ellipticity_potential);
-			printf("1 : %f %f \n",lens->ellipticity,lens->ellipticity_potential);
+			//printf("1 : %f %f \n",lens->ellipticity,lens->ellipticity_potential);
 		}
 		else if ( lens->ellipticity == 0. && lens->ellipticity_potential == 0. ){
 			lens->ellipticity_potential = 0.00001;
-			printf("2 : %f %f \n",lens->ellipticity,lens->ellipticity_potential);
+			//printf("2 : %f %f \n",lens->ellipticity,lens->ellipticity_potential);
 		}
 		else{
 			// epot is (a-b)/(a+b)
 			lens->ellipticity_potential = (1. - sqrt(1 - lens->ellipticity * lens->ellipticity)) / lens->ellipticity;
-			printf("3 : %f %f \n",lens->ellipticity,lens->ellipticity_potential);
+			//printf("3 : %f %f \n",lens->ellipticity,lens->ellipticity_potential);
 		}
         break;
 
 	default:
             printf( "ERROR: LENSPARA profil type of clump %s unknown : %d\n",lens->name, lens->type);
 		break;
     };
 	
 }
 
 void module_readParameters_calculatePotentialparameter_SOA(Potential_SOA *lens, int ind){
 
 
 
 
 	switch (lens->type[ind])
     {
 
 	case(5): /*Elliptical Isothermal Sphere*/
 		//impact parameter b0
 		lens->b0[ind] = 4* pi_c2 * lens->vdisp[ind] * lens->vdisp[ind] ;
 		//ellipticity_potential
 		lens->ellipticity_potential[ind] = lens->ellipticity[ind]/3 ;
 	    break;
 
 	case(8): /* PIEMD */
 		//impact parameter b0
 		lens->b0[ind] = 6.*pi_c2 * lens->vdisp[ind] * lens->vdisp[ind];
 		//ellipticity_parameter
 	    if ( lens->ellipticity[ind] == 0. && lens->ellipticity_potential[ind] != 0. ){
 			// emass is (a2-b2)/(a2+b2)
 			lens->ellipticity[ind] = 2.*lens->ellipticity_potential[ind] / (1. + lens->ellipticity_potential[ind] * lens->ellipticity_potential[ind]);
 			//printf("1 : %f %f \n",lens->ellipticity[ind],lens->ellipticity_potential[ind]);
 		}
 		else if ( lens->ellipticity[ind] == 0. && lens->ellipticity_potential[ind] == 0. ){
 			lens->ellipticity_potential[ind] = 0.00001;
 			//printf("2 : %f %f \n",lens->ellipticity[ind],lens->ellipticity_potential[ind]);
 		}
 		else{
 			// epot is (a-b)/(a+b)
 			lens->ellipticity_potential[ind] = (1. - sqrt(1 - lens->ellipticity[ind] * lens->ellipticity[ind])) / lens->ellipticity[ind];
 			//printf("3 : %f %f \n",lens->ellipticity[ind],lens->ellipticity_potential[ind]);
 		}
         break;
 
 	case(81): /* PIEMD */
 		//impact parameter b0
 		lens->b0[ind] = 6.*pi_c2 * lens->vdisp[ind] * lens->vdisp[ind];
 		//ellipticity_parameter
 	    if ( lens->ellipticity[ind] == 0. && lens->ellipticity_potential[ind] != 0. ){
 			// emass is (a2-b2)/(a2+b2)
 			lens->ellipticity[ind] = 2.*lens->ellipticity_potential[ind] / (1. + lens->ellipticity_potential[ind] * lens->ellipticity_potential[ind]);
 			//printf("1 : %f %f \n",lens->ellipticity[ind],lens->ellipticity_potential[ind]);
 		}
 		else if ( lens->ellipticity[ind] == 0. && lens->ellipticity_potential[ind] == 0. ){
 			lens->ellipticity_potential[ind] = 0.00001;
 			//printf("2 : %f %f \n",lens->ellipticity[ind],lens->ellipticity_potential[ind]);
 		}
 		else{
 			// epot is (a-b)/(a+b)
 			lens->ellipticity_potential[ind] = (1. - sqrt(1 - lens->ellipticity[ind] * lens->ellipticity[ind])) / lens->ellipticity[ind];
 			//printf("3 : %f %f \n",lens->ellipticity[ind],lens->ellipticity_potential[ind]);
 		}
         break;
 
 	default:
             printf( "ERROR: LENSPARA profil type of clump %f unknown : %d N %d \n",lens->name[ind], lens->type[ind], ind);
 		break;
     };
 
 }
 
 
 
 /** @brief This module function reads the grid information
  * 
 * @param infile path to input file
 * @param grid array where grid information will be stored
 */
 
 void module_readParameters_Grid(std::string infile, grid_param *grid)
 {
 
     std::string first, second, third, line1, line2;
     double cast_1;
     double dmax ;
 
 std::ifstream IN(infile.c_str(), std::ios::in);
     if ( IN )
     {
 	while(std::getline(IN,line1))
         {
 	    std::istringstream read1(line1); // create a stream for the line
 	    read1 >> first;
             if (!strncmp(first.c_str(), "grille", 7) || !strncmp(first.c_str(), "frame", 5) || !strncmp(first.c_str(), "champ", 5))  // Read in potential
 		{
 
     while(std::getline(IN,line2))
     {
     	std::istringstream read2(line2);
     	read2 >> second >> third;
     	if (strcmp(second.c_str(), "end") == 0)  // Move to next potential and initialize it
         {
             break; // Break while loop and move to next potential
         }
     
  	// Read in values
  	
         if (!strcmp(second.c_str(), "dmax"))
         {
             sscanf(third.c_str(), "%lf", &dmax);
             //printf( "\t%s\t%lf\n", second.c_str(), dmax);
             //grid->dmax = (type_t) cast_1;
             grid->xmin = -dmax;
             grid->xmax = dmax;
             grid->ymin = -dmax;
             grid->ymax = dmax;
             grid->rmax = dmax * sqrt(2.);
         }
         else if (!strcmp(second.c_str(), "xmin"))
         {
             sscanf(third.c_str(), "%lf", &cast_1);
             grid->xmin = (type_t) cast_1;
             //printf("\t%s\t%lf\n", second.c_str(), grid->xmin);
         }
         else if (!strcmp(second.c_str(), "xmax"))
         {
             sscanf(third.c_str(), "%lf", &cast_1);
             grid->xmax = (type_t) cast_1;
             //printf("\t%s\t%lf\n", second.c_str(), grid->xmax);
         }
         else if (!strcmp(second.c_str(), "ymin"))
         {
             sscanf(third.c_str(), "%lf", &cast_1);
             grid->ymin = (type_t) cast_1;
             //printf( "\t%s\t%lf\n", second.c_str(), grid->ymin);
         }
         else if (!strcmp(second.c_str(), "ymax"))
         {
             sscanf(third.c_str(), "%lf", &cast_1);
             grid->ymax = (type_t) cast_1;
             //printf( "\t%s\t%lf\n", second.c_str(), grid->ymax);
         }
 }
 
 }  // closes if loop
 
 }  // closes while loop
 
 }  // closes if(IN)
 
 
 
 IN.close();
 
 }
 
 
 /* @brief Get number of sets for cleanlens mode
  * !!Not used. Will be reworked!!
  * 
  * This module function reads in the "clean lens" mode sources: These sources are read in and lensed only once assuming a fixed mass model
 // Then we can see the predicted location of the images and compare with their real positions
 * 
 * @param clean lens file, number of clean lens images
 * @return coordinates for each image, shape of each image, redshifts, number of sets, number of images per set 
 */
 
 
 // Get number of sets for cleanlens mode
 void module_readParameters_SingleLensingSourcesNumberSets(std::string infile, int &nsetofimages_cleanlens )
 {
     std::string line1;
     int setNumber=0;
     nsetofimages_cleanlens=0;
 
 
     // Get number of sets of images
     std::ifstream IM(infile.c_str(),std::ios::in);
 	if ( IM )
     	{
         	while( std::getline(IM,line1))
         	{ 
                         // Read in parameters
 			sscanf(line1.c_str(), "%d", &setNumber);
 			if(setNumber > nsetofimages_cleanlens) nsetofimages_cleanlens = setNumber;
 		}
 	}
 	IM.close();
 }
 
 
 
 /** @brief Prints out cosmology
 */
 
 void module_readParameters_debug_cosmology(int DEBUG, cosmo_param cosmology)
 {
 
 if (DEBUG == 1)  // If we are in debug mode
 {
 	printf("\n\n####################### Summary of Input Parameters #######################\n");
     printf("Cosmology: Cosmology model = %d, omegaM = %lf, omegaX = %lf, curvature = %lf, wX = %lf, wa = %lf, H0 =  %lf, h = %lf\n", cosmology.model, cosmology.omegaM, cosmology.omegaX, cosmology.curvature, cosmology.wX, cosmology.wa, cosmology.H0, cosmology.h);
 	
 }
 
 }
 
 /** @brief Prints out runmode
 */
 
 void module_readParameters_debug_runmode(int DEBUG, runmode_param runmode)
 {
 if (DEBUG == 1)  // If we are in debug mode
 {
 
     printf("Runmode: nhalos = %d, nsets = %d, nimagestot = %d, source = %d, image = %d, arclet  = %d, cline = %d, inverse= %d, multi= %d DEBUG = %d\n", runmode.nhalos, runmode.nsets, runmode.nimagestot, runmode.source, runmode.image, runmode.arclet, runmode.cline, runmode.inverse, runmode.multi, runmode.debug);
 
 }
 
 }
 /** @brief Prints out images
 */
 void module_readParameters_debug_image(int DEBUG, galaxy image[], int nImagesSet[], int nsets)
 {
 if (DEBUG == 1)  // If we are in debug mode
 {
 	int index = 0;
 	for ( int i = 0; i < nsets; ++i){
 		for( int j = 0; j < nImagesSet[i]; ++j){
 			printf("Image [%d]: x = %lf, y = %lf, shape: a = %f, b = %f, theta = %lf, redshift = %lf,  nImagesSet = %d,\n",index , image[index].center.x, image[index].center.y,  image[index].shape.a, image[index].shape.b, image[index].shape.theta, image[index].redshift,  nImagesSet[i]);
 			index +=1;
 			//printf( "%d \n", index );
 		}
 	}
 }
 
 }
 /** @brief Prints out source
 */
 void module_readParameters_debug_source(int DEBUG, galaxy source[], int nsets)
 {
 if (DEBUG == 1)  // If we are in debug mode
 {
 	for ( int i = 0; i < nsets; ++i){
 		printf("Source[%d]: x = %lf, y = %lf, shape: a = %f, b = %f, theta = %lf, redshift = %lf. \n\n", i, source[i].center.x, source[i].center.y,  source[i].shape.a, source[i].shape.b, source[i].shape.theta, source[i].redshift);
 	}
 }
 
 }
 /** @brief Prints out massdistributions
 */
 void module_readParameters_debug_potential(int DEBUG, Potential lenses[], int nhalos)
 {
 if (DEBUG == 1)  // If we are in debug mode
 {
 	for ( int i = 0; i < nhalos; ++i){
 		printf("Potential[%d]: x = %f, y = %f, vdisp = %f, type = %d, type_name = %s, name = %s,\n \t ellipticity = %f, ellipticity_pot = %f, ellipticity angle (radians) = %f, rcore = %f, rcut = %f,\n \t rscale = %f, exponent = %f, alpha = %f, einasto kappa critical = %f, z = %f\n", i,lenses[i].position.x, lenses[i].position.y, lenses[i].vdisp, lenses[i].type, lenses[i].type_name, lenses[i].name, lenses[i].ellipticity, lenses[i].ellipticity_potential, lenses[i].ellipticity_angle, lenses[i].rcore, lenses[i].rcut, lenses[i].rscale, lenses[i].exponent, lenses[i].alpha, lenses[i].einasto_kappacritic, lenses[i].z);
 	}
 }
 
 }
 
 void module_readParameters_debug_potential_SOA(int DEBUG, Potential_SOA lenses, int nhalos)
 {
 if (DEBUG == 1)  // If we are in debug mode
 {
 	for ( int i = 0; i < nhalos; ++i){
 		printf("Potential[%d]: x = %f, y = %f, b0 = %f, vdisp = %f, type = %d, \n \t ellipticity = %f, ellipticity_pot = %f, ellipticity angle (radians) = %f, rcore = %f, rcut = %f,\n \t z = %f\n, angle cos %f, sin %f \n", i,lenses.position_x[i], lenses.position_y[i], lenses.b0[i],lenses.vdisp[i], lenses.type[i], lenses.ellipticity[i], lenses.ellipticity_potential[i], lenses.ellipticity_angle[i], lenses.rcore[i], lenses.rcut[i], lenses.z[i], lenses.anglecos[i], lenses.anglesin[i]);
 	}
 }
 
 }
 
 /** @brief Prints out potfile_param
 */
 void module_readParameters_debug_potfileparam(int DEBUG, potfile_param *potfile)
 {
 if (DEBUG == 1)  // If we are in debug mode
 {
 
 	printf("Potfile: potid = %d, filename = %s, ftype = %d, type = %d, zlens = %f, mag0 = %f, sigma = %f,\n \t core = %f, corekpc = %f, ircut = %d, cut1 = %f, cut2 = %f,\n \t cutkpc1 = %f, cutkpc2 = %f, islope = %d, slope1 = %f, slope2 = %f\n",    potfile->potid,potfile->potfile,potfile->ftype,potfile->type,potfile->zlens,potfile->mag0,potfile->sigma,potfile->core,potfile->corekpc,potfile->ircut,potfile->cut1,potfile->cut2,potfile->cutkpc1,potfile->cutkpc2,potfile->islope,potfile->slope1,potfile->slope2);
 }
 
 }
 /** @brief Prints out cline
 */
 void module_readParameters_debug_criticcaustic(int DEBUG, cline_param cline)
 {
 if (DEBUG == 1)  // If we are in debug mode
 {
 
     printf("Cline: Number of planes = %d ", cline.nplan);
     for( int i=0; i < cline.nplan; ++i){
 		printf(" z[%d] = %f, ",i ,cline.cz[i]);
 		}  
 	printf(" dmax = %f, Low = %f, High= %f, Nb of gridcells %d \n", cline.dmax , cline.limitLow ,cline.limitHigh, cline.nbgridcells);
 
 }
 
 }
 /** @brief Prints out limits
 */
 void module_readParameters_debug_limit(int DEBUG, struct potentialoptimization host_potentialoptimization)
 {
 if (DEBUG == 1)  // If we are in debug mode
 {
 
     printf("DEBUG: Center.x B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.position.x.block, host_potentialoptimization.position.x.min, host_potentialoptimization.position.x.max, host_potentialoptimization.position.x.sigma);
     printf("DEBUG: Center.y B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.position.y.block, host_potentialoptimization.position.y.min, host_potentialoptimization.position.y.max, host_potentialoptimization.position.y.sigma);
     printf("DEBUG: weight.y B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.weight.block, host_potentialoptimization.weight.min, host_potentialoptimization.weight.max, host_potentialoptimization.weight.sigma);
     printf("DEBUG: ellipticity_angle B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.ellipticity_angle.block, host_potentialoptimization.ellipticity_angle.min, host_potentialoptimization.ellipticity_angle.max, host_potentialoptimization.ellipticity_angle.sigma);
     printf("DEBUG: ellipticity B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.ellipticity.block, host_potentialoptimization.ellipticity.min, host_potentialoptimization.ellipticity.max, host_potentialoptimization.ellipticity.sigma);
     printf("DEBUG: ellipticity_potential B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.ellipticity_potential.block, host_potentialoptimization.ellipticity_potential.min, host_potentialoptimization.ellipticity_potential.max, host_potentialoptimization.ellipticity_potential.sigma);
     printf("DEBUG: rcore B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.rcore.block, host_potentialoptimization.rcore.min, host_potentialoptimization.rcore.max, host_potentialoptimization.rcore.sigma);
     printf("DEBUG: rcut B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.rcut.block, host_potentialoptimization.rcut.min, host_potentialoptimization.rcut.max, host_potentialoptimization.rcut.sigma);
     printf("DEBUG: rscale B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.rscale.block, host_potentialoptimization.rscale.min, host_potentialoptimization.rscale.max, host_potentialoptimization.rscale.sigma);
     printf("DEBUG: exponent B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.exponent.block, host_potentialoptimization.exponent.min, host_potentialoptimization.exponent.max, host_potentialoptimization.exponent.sigma);
     printf("DEBUG: vdisp B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.vdisp.block, host_potentialoptimization.vdisp.min, host_potentialoptimization.vdisp.max, host_potentialoptimization.vdisp.sigma);
     printf("DEBUG: alpha B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.alpha.block, host_potentialoptimization.alpha.min, host_potentialoptimization.alpha.max, host_potentialoptimization.alpha.sigma);
     printf("DEBUG: z B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.z.block, host_potentialoptimization.z.min, host_potentialoptimization.z.max, host_potentialoptimization.z.sigma);
             
 }
 
 }