diff --git a/Benchmarks/ChiBenchmark/chi_CPU.cpp b/Benchmarks/ChiBenchmark/chi_CPU.cpp
index a06e5f2..b7f660b 100644
--- a/Benchmarks/ChiBenchmark/chi_CPU.cpp
+++ b/Benchmarks/ChiBenchmark/chi_CPU.cpp
@@ -1,827 +1,828 @@
 #include <iostream>
 #include <string.h>
 //#include <cuda_runtime.h>
 #include <math.h>
 #include <sys/time.h>
 #include <fstream>
 #include <unistd.h>
 #include <assert.h>
 
 
 #ifndef __xlC__
 #warning "gnu compilers"
 #include <immintrin.h>
 #endif
 
 //#include "simd_math.h"
 #include "chi_CPU.hpp"
 //#ifdef __WITH_GPU
 #include "grid_gradient_GPU.cuh"
 ////#endif
 //#include "gradient_GPU.cuh"
 #ifdef _OPENMP
 #include <omp.h>
 #endif
 #ifdef __WITH_MPI
 #include<mpi.h>
 #include"mpi_check.h"
 #endif
 
 
 #define MIN(a,b) (((a)<(b))?(a):(b))
 #define MAX(a,b) (((a)>(b))?(a):(b))
 
 
 double myseconds()
 {
         struct timeval  tp;
         struct timezone tzp;
         int i;
 
         i = gettimeofday(&tp,&tzp);
         return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 );
 }
 
 
 void mychi_bruteforce_SOA_CPU_grid_gradient(double *chi, int *error, runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, galaxy *images)
 {
 	//
 	//double dx, dy; //, x_pos, y_pos;        //pixelsize
 	//	
 //	double im_dist[MAXIMPERSOURCE]; // distance to a real image for an "theoretical" image found from a theoretical source
 //	int im_index;       		// index of the closest real image for an image found from a theoretical source
 //	int second_closest_id;   	// index of the second closest real image for an image found from a theoretical source
 //	int thread_found_image = 0;   	// at each iteration in the lens plane, turn to 1 whether the thread find an image
 // 	struct point im_position, temp; // position of the image found from a theoretical source + temp variable for comparison
 //	struct triplet Tsup, Tinf, Tsupsource, Tinfsource;// triangles for the computation of the images created by the theoretical sources
 	struct galaxy sources[runmode->nsets][runmode->nimagestot]; // theoretical sources (common for a set)
 	int    nimagesfound  [runmode->nsets][runmode->nimagestot][MAXIMPERSOURCE]; // number of images found from the theoretical sources
 	int    locimagesfound[runmode->nsets][runmode->nimagestot];
 	struct point image_pos [runmode->nsets][runmode->nimagestot][MAXIMPERSOURCE];
 	//double image_dist    [runmode->nsets][runmode->nimagestot][MAXIMPERSOURCE];
 	struct point tim     [runmode->nimagestot][MAXIMPERSOURCE]; // theoretical images (computed from sources)
 	//
 	int world_size = 1;
 	int world_rank = 0;
 #ifdef __WITH_MPI
 	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
 	MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
 	MPI_Barrier(MPI_COMM_WORLD);
 #endif
 	unsigned int verbose = (world_rank == 0);
 	//
 	int grid_size     = runmode->nbgridcells;
 	int loc_grid_size = runmode->nbgridcells/world_size;
 	//
 	double y_len      = fabs(frame->ymax - frame->ymin);
         int    y_len_loc  = runmode->nbgridcells/world_size;
         int    y_pos_loc  = (int) world_rank*y_len_loc;
 	int    y_bound    = y_len_loc;
 	if ((world_rank + 1) != world_size) y_bound++;
 	//
 	//
 	const double dx   = (frame->xmax - frame->xmin)/(runmode->nbgridcells - 1);
 	const double dy   = (frame->ymax - frame->ymin)/(runmode->nbgridcells - 1);	
 	//
 	double *grid_gradient_x, *grid_gradient_y;
 	//
 	//grid_gradient_x   = (double *)malloc((int) grid_size*loc_grid_size*sizeof(double));
 	//grid_gradient_y   = (double *)malloc((int) grid_size*loc_grid_size*sizeof(double));
 	grid_gradient_x   = (double *)malloc((int) grid_size*y_bound*sizeof(double));
 	grid_gradient_y   = (double *)malloc((int) grid_size*y_bound*sizeof(double));
 	//
 	//uais
         //if (verbose) printf("@@%d: nsets = %d nimagestot = %d maximgpersource = %d, grid_size = %d, loc_grid_size = %d, y_pos_loc = %d\n", world_rank, runmode->nsets, runmode->nimagestot, MAXIMPERSOURCE, grid_size, loc_grid_size, y_pos_loc);
 	//
 	const int grid_dim = runmode->nbgridcells;
 	//Packaging the image to sourceplane conversion
 	double time = -myseconds();
 	//gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_dim);
 #ifdef __WITH_GPU
 	gradient_grid_GPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, 0, y_pos_loc);
 	//gradient_grid_GPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_size);
 #else
-	gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, 0, y_pos_loc);
+	int zero = 0;
+	gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, zero, y_pos_loc);
 #endif
 
 #if 0
 	if (world_rank == 0)
 	for (int jj = 0; jj < loc_grid_size; ++jj)
 		for (int ii = 0; ii < runmode->nbgridcells; ++ii)
 			printf("%d %d: %f %f\n", ii, jj, grid_gradient_x[jj*grid_size + ii], grid_gradient_y[jj*grid_size + ii]);
 #endif
 
 	//gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_size, loc_grid_size);
 #ifdef __WITH_MPI
 	MPI_Barrier(MPI_COMM_WORLD);
 #endif
 	time += myseconds();
 	if (verbose) printf("	Gridgrad time = %f s.\n", time);
 	//
 	int index          = 0;       // index tracks the image within the total image array in the image plane
 	*chi  		   = 0;
 	//
 	double chi2_time   = 0.;
 	double loop_time   = 0.;
 	double image_time  = 0.;
 	//
 	int images_found   = 0;
 	long int images_total   = 0;
 	//
         //printf("@@nsets = %d nx = %d ny = %d, xmin = %f, dx = %f, ymin = %f, dy = %f\n", runmode->nsets, runmode->nbgridcells, runmode->nbgridcells, frame->xmin, dx, frame->ymin, dy );
 	//
 	int numsets = 0;
 	//
 	for( int  source_id = 0; source_id < runmode->nsets; source_id ++)
 		numsets += nimages_strongLensing[source_id];
 	//printf("@@Total numsets = %d\n", numsets);
 	//
 	time = -myseconds();
 	//
 	// nsets     : number of images in the source plane
 	// nimagestot: number of images in the image plane
 	//
 	image_time -= myseconds();
 	//
 	unsigned int nsets = runmode->nsets;
 	for( int  source_id = 0; source_id < runmode->nsets; source_id ++)
 	{
 		// number of images in the image plane for the specific image (1,3,5...)
 		unsigned short int nimages = nimages_strongLensing[source_id];
 		//@@printf("@@ source_id = %d, nimages = %d\n",  source_id, nimages_strongLensing[source_id]);
 		//____________________________ image (constrains) loop ________________________________
 		for(unsigned short int image_id = 0; image_id < nimages; image_id++)
 		{
 			//printf("@@  nimages = %d\n",  nimages_strongLensing[source_id]);
 			//________ computation of theoretical sources _________
 			// output: sources[source_id].center
 			//printf("Image = %f %f\n", images[index + image_id].center.x, images[index + image_id].center.y);
 			mychi_transformImageToSourcePlane_SOA(runmode->nhalos, 
 							&images[index + image_id].center,
 							 images[index + image_id].dr, 
 							 lens, 
 							&sources[source_id][image_id].center);
 			//
 			// void mychi_transformImageToSourcePlane_SOA(const int Nlens, const struct point *image_point, double dlsds, const struct Potential_SOA *lens, struct point *source_point)
 			//
 			struct point Grad = module_potentialDerivatives_totalGradient_SOA(&images[index + image_id].center, lens, runmode->nhalos);
 			//printf("	image %d, %d (%d) = (%.15f, %.15f) -> (%.15f, %.15f)\n", source_id, image_id, nimages, images[index + image_id].center.x, images[index + image_id].center.y, sources[source_id].center.x, sources[source_id].center.y );
 			//printf("Grad.x = %f, %f\n", Grad.x, grid_gradient_x[images[index + image_id].center.x/dx]); 
 			//
 			// find the position of the constrain in the source plane
 			sources[source_id][image_id].center.x = images[index + image_id].center.x - images[index + image_id].dr*Grad.x;
                         sources[source_id][image_id].center.y = images[index + image_id].center.y - images[index + image_id].dr*Grad.y;
 			//
 			//time += myseconds(); 
 			//
 			sources[source_id][image_id].redshift = images[index + image_id].redshift;
 			//
 			sources[source_id][image_id].dr       = images[index + image_id].dr;
 			sources[source_id][image_id].dls      = images[index + image_id].dls;
 			sources[source_id][image_id].dos      = images[index + image_id].dos;
 #if 1
 		}
 		index += nimages; 
 	}
 #ifdef __WITH_MPI
         MPI_Barrier(MPI_COMM_WORLD);
 #endif
 	image_time += myseconds();
 	//
 	// main loop
 	//
 	//double y_len     = fabs(frame->ymax - frame->ymin);
 	//int    y_len_loc = runmode->nbgridcells/world_size; 
 	//int    y_pos_loc = (int) world_rank*y_len_loc;
 	//printf("%d out of %d: y_id = %d to %d\n", world_rank, world_size, y_pos_loc, y_pos_loc + y_len_loc - 1);
 	//fflush(stdout);
         //MPI_Barrier(MPI_COMM_WORLD);
 	//
 	loop_time    -= myseconds();
 	index 	      = 0;
 	int numimg    = 0;
 	//
 	for( int  source_id = 0; source_id < runmode->nsets; source_id ++)
 	{
 		// number of images in the image plane for the specific image (1,3,5...)
 		unsigned short int nimages = nimages_strongLensing[source_id];
 		//printf("@@ source_id = %d, nimages = %d\n",  source_id, nimages_strongLensing[source_id]);
 		//____________________________ image (constrains) loop ________________________________
 		for(unsigned short int image_id = 0; image_id < nimages; image_id++)
 		{
 #endif
 			//
 			//struct point image_pos [MAXIMPERSOURCE];
 			//
 			//MPI_Barrier(MPI_COMM_WORLD);
 			//if (verbose) printf("source = %d, image = %d\n", source_id, image_id);
 			//if (verbose) fflush(stdout);	
 			int loc_images_found = 0;
 			//#ifdef __WITH_MPI
 			//			MPI_Barrier(MPI_COMM_WORLD);
 			//#endif	
 #pragma omp parallel 
 #pragma omp for reduction(+: images_total) 
 			//for (int y_id = 0; y_id < (runmode->nbgridcells - 1); ++y_id )
 			//for (int y_id = 0; y_id < (y_len_loc - 1); ++y_id)
 			for (int y_id = 0; y_id < (y_bound - 1); ++y_id)
 				//for (int y_id = world_rank*y_pos_loc; y_id < (world_rank*y_pos_loc + y_len_loc - 1); ++y_id)
 			{
 				//for (int y_id = 0; (y_id < runmode->nbgridcells - 1) /*&& (loc_images_found != nimages)*/; ++y_id )
 				for (int x_id = 0; x_id < runmode->nbgridcells - 1 ; ++x_id)
 				{
 					//int yy_pos = MIN(y_pos_loc + y_id, runmode->nbgridcells - 1);
 					images_total++;
 					//
 					double x_pos = frame->xmin + (            x_id)*dx;
 					double y_pos = frame->ymin + (y_pos_loc + y_id)*dy;
 					//printf("%d: x_id = %d xpos = %f, y_id = %d ypos = %f\n", world_rank, x_id, x_pos, y_pos_loc + y_id, y_pos);
 					//
 					// Define the upper + lower triangle, both together = square = pixel
 					// 
 					struct triplet Tsup, Tinf;
 					//
 					Tsup.a.x = x_pos;
 					Tsup.b.x = x_pos;
 					Tsup.c.x = x_pos + dx;
 					Tinf.a.x = x_pos + dx;
 					Tinf.b.x = x_pos + dx;
 					Tinf.c.x = x_pos;
 					//
 					Tsup.a.y = y_pos;
 					Tsup.b.y = y_pos + dy;
 					Tsup.c.y = y_pos;
 					Tinf.a.y = y_pos + dy;
 					Tinf.b.y = y_pos;
 					Tinf.c.y = y_pos + dy;
 					//
 					// Lens to Sourceplane conversion of triangles
 					// 
 					// double time = -myseconds();
 					//
 					struct triplet Timage;
 					struct triplet Tsource;
 					//
 					//printf("	- Tsup = %f %f\n", Tsup.a.x, Tsup.a.y);
 					mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_upper(&Tsup, sources[source_id][image_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, y_id*grid_dim + x_id, grid_dim);
 					//mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_upper(&Tsup, sources[source_id][image_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, (y_pos_loc + y_id)*grid_dim + x_id, grid_dim);
 					//@@if (world_rank == 1)
 					//
 					int thread_found_image = 0;
 					//
 					if (mychi_insideborder(&sources[source_id][image_id].center, &Tsource) == 1)
 					{
 						//printf("	-> %d found: y_id = %d, x_id = %d : pixel %f %f -> %f %f\n", world_rank, y_pos_loc + y_id, x_id, Tsup.a.x, Tsup.a.y, Tsource.a.x, Tsource.a.y);
 						thread_found_image = 1;
 						Timage = Tsup;
 					}
 					else 
 					{
 						//printf("	- Tinf = %f %f\n", Tinf.a.x, Tinf.a.y);
 						mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_lower(&Tinf, sources[source_id][image_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, y_id*grid_dim + x_id, grid_dim);
 						//mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_lower(&Tinf, sources[source_id][image_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, (y_id + y_pos_loc)*grid_dim + x_id, grid_dim);
 						//@@if (world_rank == 1)
 						if (mychi_inside(&sources[source_id][image_id].center, &Tsource) == 1)
 						{
 							//printf("	-> %d found: y_id = %d, x_id = %d : pixel %f %f -> %f %f\n", world_rank, y_pos_loc + y_id, x_id, Tinf.a.x, Tinf.a.y, Tsource.a.x, Tsource.a.y);
 							thread_found_image = 1;
 							Timage = Tinf;
 						}
 					}
 #if 1
 					if (thread_found_image)
 					{
 #pragma omp critical
 						{
 							image_pos[source_id][image_id][loc_images_found] = mychi_barycenter(&Timage); // get the barycenter of the triangle
 							locimagesfound[source_id][image_id]++;
 							loc_images_found++;
 							numimg ++;
 						}
 					}
 #endif
 				}
 			}
 #if 1
 		}
 		index += nimages_strongLensing[source_id];
 	}
 #ifdef __WITH_MPI
 	MPI_Barrier(MPI_COMM_WORLD);
 #endif
 	loop_time += myseconds();
 	//
 	double comm_time = -myseconds();
 	//
 	int          numimagesfound    [runmode->nsets][runmode->nimagestot];
 	memset(&numimagesfound, 0, runmode->nsets*runmode->nimagestot*sizeof(int));
 	struct point imagesposition    [runmode->nsets][runmode->nimagestot][MAXIMPERSOURCE];
 	memset(&imagesposition, 0, runmode->nsets*runmode->nimagestot*MAXIMPERSOURCE*sizeof(point));
 	//
 	int          numimagesfound_tmp[runmode->nsets][runmode->nimagestot];
 	struct point imagesposition_tmp[runmode->nsets][runmode->nimagestot][MAXIMPERSOURCE];
 	//
 	memset(numimagesfound_tmp, 0, runmode->nsets*runmode->nimagestot*sizeof(int));
 	memset(imagesposition_tmp, 0, runmode->nsets*runmode->nimagestot*sizeof(int));
 	/*
 	//if (verbose)
 	{	
 	int image_sum = 0;
 	for (int ii = 0; ii < runmode->nsets; ++ii)
 	for (int jj = 0; jj < runmode->nimagestot; ++jj)
 	{
 	image_sum += locimagesfound[ii][jj];	
 	}
 	printf("%d: num images found = %d\n", world_rank, image_sum); 
 	}
 	MPI_Barrier(MPI_COMM_WORLD);
 	 */
 #ifdef __WITH_MPI
 	int total = 0;
 	//MPI_Reduce(&locimagesfound, &imagesfound, runmode->nsets*runmode->nimagestot, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
 	//MPI_Reduce(&locimagesfound, &imagesfound, runmode->nsets*runmode->nimagestot, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
 	//
 	if (!verbose)
 	{
 		MPI_CHECK(MPI_Send( &numimg        , 1                                                , MPI_INT   , 0, 666 + world_rank, MPI_COMM_WORLD ));
 		if (numimg != 0)
 		{
 			MPI_CHECK(MPI_Send( &locimagesfound, runmode->nsets*runmode->nimagestot               , MPI_INT   , 0, 666 + world_rank, MPI_COMM_WORLD ));
 			//MPI_CHECK(MPI_Send( &image_pos,      runmode->nsets*runmode->nimagestot*MAXIMPERSOURCE, MPI_points, 0, 667 + world_rank, MPI_COMM_WORLD ));
 			MPI_CHECK(MPI_Send( &image_pos,      runmode->nsets*runmode->nimagestot*MAXIMPERSOURCE*2, MPI_DOUBLE, 0, 667 + world_rank, MPI_COMM_WORLD ));
 		}
 	}
 	//
 	if (verbose)
 	{
 		int image_sum = 0;
 		//
 		for (int ipe = 0; ipe < world_size; ++ipe)
 		{
 			MPI_Status status;
 			//
 			if (ipe == 0)
 			{
 				memcpy(&numimagesfound_tmp, &locimagesfound, runmode->nsets*runmode->nimagestot*sizeof(int));
 				memcpy(&imagesposition_tmp, &image_pos,      runmode->nsets*runmode->nimagestot*MAXIMPERSOURCE*sizeof(point)); 
 			}
 			else
 			{
 				MPI_CHECK(MPI_Recv(&numimg            , 1                                                  , MPI_INT   , ipe, 666 + ipe, MPI_COMM_WORLD, &status));	
 				if (numimg != 0)
 				{
 					MPI_CHECK(MPI_Recv(&numimagesfound_tmp, runmode->nsets*runmode->nimagestot                 , MPI_INT   , ipe, 666 + ipe, MPI_COMM_WORLD, &status));
 					MPI_CHECK(MPI_Recv(&imagesposition_tmp, runmode->nsets*runmode->nimagestot*MAXIMPERSOURCE*2, MPI_DOUBLE, ipe, 667 + ipe, MPI_COMM_WORLD, &status));
 				}
 			}
 			//
 			//MPI_Reduce(&imagesfound_tmp, &total, ipe, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
 			//
 			if (numimg != 0)
 			for (int jj = 0; jj < runmode->nimagestot; ++jj)
 			{
 				for (int ii = 0; ii < runmode->nsets; ++ii)
 				{
 					//int img_len = numimagesfound[ii][jj];
 					int img_len = numimagesfound_tmp[ii][jj];
 					//printf("%d: %d %d, img_len = %d\n", ipe, ii, jj, img_len);
 					image_sum  += img_len;
 					if (img_len != 0)
 					{
 						//int loc_length = numimagesfound[ii][jj];
 						int loc_length = numimagesfound[ii][jj];
 						memcpy(&imagesposition[ii][jj][loc_length], &imagesposition_tmp[ii][jj], img_len*sizeof(point));
 						numimagesfound[ii][jj] += img_len;
 						numimg += img_len;
 
 					}
 				}
 			}
 		}
 	}
 	//MPI_Barrier(MPI_COMM_WORLD);
 	comm_time += myseconds();
 #endif
 	//
 	// image extraction to compute 
 	//
 	chi2_time = -myseconds();
 	index     =  0;
 	//
 	if (verbose)
 		for( int  source_id = 0; source_id < runmode->nsets; source_id ++)
 		{
 			unsigned short int nimages = nimages_strongLensing[source_id];
 			//
 			//______________________Initialisation______________________
 			//
 			for (unsigned short int i = 0; i < nimages; ++i)
 				for (unsigned short int j = 0; j < nimages; ++j)
 					nimagesfound[source_id][i][j] = 0;
 			//
 			for( unsigned short int image_id = 0; image_id < nimages; image_id++)
 			{
 #endif
 				//
 				double image_dist[MAXIMPERSOURCE];
 				//
 				//printf("	Image %d, number of sources found %d\n", image_id, loc_images_found);
 				//
 				struct point image_position;
 				int	     image_index;
 				//
 				for (int ii = 0; ii < /*loc*/numimagesfound[source_id][image_id]; ++ii)
 				{	
 					//
 					//
 					int image_index = 0;
 					//
 					image_position = imagesposition[source_id][image_id][ii]; 
 					image_dist[0] = mychi_dist(image_position, images[index + 0].center);  // get the distance to the real image
 					for(int i = 1; i < nimages_strongLensing[source_id]; i++)
 					{  // get the distance to each real image and keep the index of the closest real image
 
 						image_dist[i] = mychi_dist(image_position, images[index + i].center);
 						//printf("	*** image %d = %f %f, distance = %f\n", index + i, images[index + i].center.x, images[index + i].center.y, image_dist[i]);
 						if (image_dist[i] < image_dist[image_index])
 						{
 							image_index = i;
 						}
 					}
 					//
 					// we should exit loops here
 					//
 					// p1_time += myseconds();
 					//
 					int skip_image = 0;
 					// Sometimes due to the numerical errors at the centerpoint, 
 					// for SIE potentials an additional image will appear at the center of the Potential.
 					// This is due to the fact that it is not possible to simulate an infinity value 
 					// at the center correctly, Check that sis correspond to Nlens[0]
 					for (int iterator = 0; iterator < runmode->Nlens[0]; ++iterator)
 					{
 						if ( fabs(image_position.x - lens[0].position_x[iterator]) <= dx/2. and fabs(image_position.y  - lens[0].position_y[iterator]) <= dx/2.)
 						{
 							skip_image = 1;
 							printf("WARNING: You are using SIE potentials. An image to close to one of the potential centers has been classified as numerical error and removed \n");
 						}
 					}
 					//printf("%d %d %d %d\n", x_id, y_id,thread_found_image, skip_image);
 					if (!skip_image)
 					{
 						//#pragma omp atomic
 						images_found++;
 						struct point temp;
 						//printf("                        source %d, image %d, index %d, Images found: %d\n", source_id, image_id, image_index, nimagesfound[source_id][image_id][image_index]);
 						//checking whether a closest image has already been found
 						if (nimagesfound[source_id][image_id][image_index] == 0)
 						{ // if no image found up to now
 
 							//image position is allocated to theoretical image
 							//#pragma omp critical
 							tim[image_id][image_index] = image_position;  
 							//#pragma omp atomic
 							nimagesfound[source_id][image_id][image_index]++;
 						}
 						else if (nimagesfound[source_id][image_id][image_index] > 0)
 						{ // if we have already found an image
 							// If the new image we found is closer than the previous image
 							//printf("	tim2: %f %f\n", image_dist[image_index], mychi_dist(images[index + image_index].center, tim[image_id][image_index]));
 							if (image_dist[image_index] < mychi_dist(images[index + image_index].center, tim[image_id][image_index]))
 							{
 								temp = tim[image_id][image_index]; // we store the position of the old image in temp
 								//#pragma omp critical
 								tim[image_id][image_index] = image_position; // we link the observed image with the image we just found
 								//printf("tim2 %d %d = %f %f\n", image_id, image_index, image_position.x, image_position.y);
 							}
 							else
 							{
 								temp = image_position; // we store the position of the image we just found in temp
 							}
 							// initialising second_closest_id to the highest value
 							// Loop over all images in the set except the closest one
 							// and initialize to the furthest away image
 							int second_closest_id = 0;
 							for (int i = 1; i < nimages_strongLensing[source_id] && (i != image_index); i++)
 							{
 								if(image_dist[i] > image_dist[second_closest_id]) second_closest_id=i;
 							}
 							///////////////////////////////////////////////////////////////
 							// Loop over all images in the set that are not yet allocated to a theoretical image
 							// and allocate the closest one
 							// we search for an observed image not already linked (nimagesfound=0)
 							for(int i = 0; i < nimages_strongLensing[source_id] && nimagesfound[source_id][image_id][i] == 0; i++) 
 							{
 								if(image_dist[i] < image_dist[second_closest_id])
 								{
 									second_closest_id = i;
 									// im_index value changes only if we found a not linked yet image
 									image_index = i; 
 									//printf("tim3 %d %d = %f %f\n", image_id, image_index, temp.x, temp.y);
 									//#pragma omp critical
 									tim[image_id][image_index] = temp; // if we found an observed and not already linked image, we allocate the theoretical image temp
 								}
 							}
 							// increasing the total number of images found (If we find more than 1 theoretical image linked to 1 real image, 
 							// these theoretical
 							//#pragma omp atomic
 							nimagesfound[source_id][image_id][image_index]++; 
 							// images are included in this number)
 						}
 					}
 					//#pragma omp atomic
 					//loc_images_found++;
 					//thread_found_image  = 0; // for next iteration
 				}
 				//
 			}
 			//#pragma omp barrier
 			//____________________________ end of image loop
 			//
 			//____________________________ computing the local chi square
 			//
 			double chiimage;
 			//
 			int _nimages = nimages_strongLensing[source_id];
 			//
 			for( int iter = 0; iter < _nimages*_nimages; iter++)
 			{
 				int i=iter/nimages_strongLensing[source_id];
 				int j=iter%nimages_strongLensing[source_id];
 
 				//printf("nimagesfound %d %d = %d\n", i, j, nimagesfound[i][j]);
 				if(i != j)
 				{
 					// In the current method, we get the source in the source plane by ray tracing image in nimagesfound[i][i]. If we ray trace back,
 					// we arrive again at the same position and thus the chi2 from it is 0. Thus we do not calculate the chi2 (-> if i!=j)
 					if(nimagesfound[source_id][i][j] > 0)
 					{
 						double pow1 = images[index + j].center.x - tim[i][j].x;
 						double pow2 = images[index + j].center.y - tim[i][j].y;
 						//
 						//chiimage = pow(images[index + j].center.x - tim[i][j].x, 2) + pow(images[index + j].center.y - tim[i][j].y, 2);  // compute the chi2
 						chiimage = pow1*pow1 + pow2*pow2;  // compute the chi2
 						//printf("%d %d = %.15f\n", i, j, chiimage);
 						*chi    += chiimage;
 					}
 					else 
 						if(nimagesfound[source_id][i][j] == 0)
 						{
 							// If we do not find a correpsonding image, we add a big value to the chi2 to disfavor the model
 							*chi += 100.*nimages_strongLensing[source_id];
 						}
 				}
 			}
 			//
 			//____________________________ end of computing the local chi square
 			//
 			//printf("%d: chi = %.15f\n", source_id, *chi);
 			/*
 			   for (int i=0; i < nimages_strongLensing[source_id]; ++i){
 			   for (int j=0; j < nimages_strongLensing[source_id]; ++j){
 			   printf(" %d",nimagesfound[i][j]);
 			   }
 			   printf("\n");
 			   }*/
 
 			//Incrementing Index: Images already treated by previous source_id
 			index += nimages_strongLensing[source_id];
 		}
 	MPI_Barrier(MPI_COMM_WORLD);
 	//
 	chi2_time += myseconds();
 	time      += myseconds();
 	//
 	if (verbose)
 	{
 		//
 //		int nthreads = 1;
 		//
 //#pragma omp parallel
 //		nthreads = omp_get_num_threads();
 		//
 		printf("	overall time  = %f s.\n", time);
 		printf("		- image  time = %f s.\n", image_time);
 		printf("		- loop   time = %f s.\n", loop_time);
 		printf("		- comm   time = %f s.\n", comm_time);
 		printf("		- chi2   time = %f s.\n", chi2_time);
 		//
 		printf("	images found: %d out of %ld\n", images_found, images_total);
 	}
 	//
 	free(grid_gradient_x);
 	free(grid_gradient_y);
 }
 
 
 /** @brief Tranform a point from image to source plane. Result stored in sourcepoint argument
  *
  * Tranform a point from image to source plane using lensequation
  *
  * @param image_point	 image position
  * @param dlsds		 dls/ds
  * @param nhalos	 number of halos
  * @param potential_param	 gravitational potential information
  * @param source_point	 address where source information will be stored
  *
  *
  */
 void mychi_transformImageToSourcePlane(const runmode_param *runmode, const struct point *image_point, double dlsds, const struct Potential *lens, struct point *source_point)
 {   // dlsds is the distance between lens and source divided by the distance observer-source
 	struct point Grad;  // gradient
 
 	Grad = module_potentialDerivatives_totalGradient(runmode->nhalos, image_point, lens);
 	//Grad = module_potentialDerivatives_totalGradient_SOA(image_point, lens, runmode->Nlens);
 
 	source_point->x = image_point->x - dlsds*Grad.x;
 	source_point->y = image_point->y - dlsds*Grad.y;
 	//printf("dlsds %f", dlsds);
 }
 
 
 void mychi_transformImageToSourcePlane_SOA(const int Nlens, const struct point *image_point, double dlsds, const struct Potential_SOA *lens, struct point *source_point)
 {   // dlsds is the distance between lens and source divided by the distance observer-source
 	struct point Grad;  // gradient
 	Grad = module_potentialDerivatives_totalGradient_SOA(image_point, lens, Nlens);
 	//
 	source_point->x = image_point->x - dlsds*Grad.x;
 	source_point->y = image_point->y - dlsds*Grad.y;
 	//printf("dlsds %f", dlsds);
 }
 //
 //
 //
 	inline
 void mychi_transformImageToSourcePlane_SOA_Packed( const struct point *image_point, double dlsds, struct point *source_point, double *grad_x, double * grad_y, int grad_id)
 {
 
 	source_point->x = image_point->x - dlsds*grad_x[grad_id];
 	source_point->y = image_point->y - dlsds*grad_y[grad_id];
 	int world_rank;
 	//MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
 	//if (world_rank == 1) 
 	//printf("	%d: %f %f = %f %f - dlsds = %f grad id = %d grad = (%f %f)\n", world_rank, source_point->x, source_point->y, image_point->x, image_point->y, dlsds, grad_id, grad_x[grad_id], grad_y[grad_id]);
 	//printf("dlsds %f", dlsds);
 }
 
 
 /** @brief Tranform a triangle from image to source plane. Result stored in S triangle argument
  *
  * Return a triplet of points in the source plane corresponding to the triplet
  * of images. dlsds is the lens efficiency at the source redshift.
  * I is the triangle in the image plane (input), S is the same triangle in the source plane (output)
  *
  * @param I	 triangle in image plane
  * @param dlsds	 dls/ds
  * @param nhalos	 number of halos
  * @param potential_param	 gravitational potential information
  * @param S	 address where triangle source information will be stored
  *
  *
  */
 
 	inline
 void mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_upper( struct triplet *I, double dlsds, struct triplet *S, double *grad_x, double * grad_y, int grad_id, int nbgridcell)
 {
 	mychi_transformImageToSourcePlane_SOA_Packed( &I->a, dlsds,   &S->a, grad_x, grad_y, grad_id             );
 	mychi_transformImageToSourcePlane_SOA_Packed( &I->b, dlsds,   &S->b, grad_x, grad_y, grad_id + nbgridcell);
 	mychi_transformImageToSourcePlane_SOA_Packed( &I->c, dlsds,   &S->c, grad_x, grad_y, grad_id +          1);
 }
 
 	inline
 void mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_lower( struct triplet *I, double dlsds, struct triplet *S, double *grad_x, double * grad_y, int grad_id, int nbgridcell)
 {
 	mychi_transformImageToSourcePlane_SOA_Packed( &I->a, dlsds,   &S->a, grad_x, grad_y, grad_id + nbgridcell + 1);
 	mychi_transformImageToSourcePlane_SOA_Packed( &I->b, dlsds,   &S->b, grad_x, grad_y, grad_id +              1);
 	mychi_transformImageToSourcePlane_SOA_Packed( &I->c, dlsds,   &S->c, grad_x, grad_y, grad_id + nbgridcell    );
 }
 
 
 
 /** @brief Return the scalar triple product (a*b).c of the 3 vectors A[x,y,1], B[x,y,1], C[x,y,1].
  * If 2 of the 3 vectors are equal, colinear or form an orthogonal basis,
  * the triple product is 0.
  * This is also the determinant of the matrix
  *   | Ax  Bx  Cx |
  *   | Ay  By  Cy |
  *   |  1   1   1 |
  */
 //inline
 double mychi_determinant(const struct point *A,
 		const struct point *B,
 		const struct point *C)
 {
 	return( B->x*C->y - B->y*C->x +
 			A->x*B->y - A->y*B->x +
 			A->y*C->x - A->x*C->y );
 }
 
 
 /** @brief Return 1 if P is inside the triangle T, 0 otherwise.
  * Return 1 if P is inside the triangle T, 0 otherwise.
  * @param P  a point
  * @param T  a triplet of points.
  *
  *
  */
 	inline
 int mychi_inside(const struct point *P, struct triplet *T)
 {
 	double  s, s1, s2, d;
 
 	d  = mychi_determinant(&T->a, &T->b, &T->c);
 	s  = mychi_determinant(&T->a, &T->b, P)*d;
 	if (s < 0.) return 0;
 	s1 = mychi_determinant(&T->b, &T->c, P)*d;
 	if (s1 < 0.) return 0;
 	s2 = mychi_determinant(&T->c, &T->a, P)*d;
 	if (s2 < 0.) return 0;
 	return 1;
 
 	return((s > 0.) && (s1 > 0.) && (s2 > 0.));  // If all determinants are positive,
 	// the point must be inside the triangle
 }
 
 
 /*
    int
    mychi_inside2(const struct point *A, const struct point *B, const struct point *C)
    {
 
 // Compute vectors        
 v0 = C - A;
 v1 = B - A;
 v2 = P - A;
 
 // Compute dot products
 dot00 = dot(v0, v0);
 dot01 = dot(v0, v1);
 dot02 = dot(v0, v2);
 dot11 = dot(v1, v1);
 dot12 = dot(v1, v2);
 
 // Compute barycentric coordinates
 invDenom = 1 / (dot00 * dot11 - dot01 * dot01);
 u = (dot11 * dot02 - dot01 * dot12) * invDenom;
 v = (dot00 * dot12 - dot01 * dot02) * invDenom;
 
 // Check if point is in triangle
 return (u >= 0) && (v >= 0) && (u + v < 1);
 }
  */
 
 /** @brief Return 1 if P is inside the triangle T or on its border, 0 otherwise.
  *
  * Return 1 if P is inside the triangle T or on its border, 0 otherwise.
  * @param  P  a point
  * @param  T  a triplet of points.
  *
  *
  */
 	inline
 int mychi_insideborder(const struct point *P, struct triplet *T)
 {
 	double  s, s1, s2, d;
 
 	d  = mychi_determinant(&T->a, &T->b, &T->c);
 	s  = mychi_determinant(&T->a, &T->b, P)*d;
 	if (s < 0.) return 0;
 	s1 = mychi_determinant(&T->b, &T->c, P)*d;
 	if (s1 < 0.) return 0;
 	s2 = mychi_determinant(&T->c, &T->a, P)*d;
 	if (s2 < 0.) return 0;
 	return 1;
 	return((s >= 0.) && (s1 >= 0.) && (s2 >= 0.));  // If all determinants are positive or 0,
 	// the point must be inside the triangle or on its border
 
 }
 
 /** @brief Barycentre of a triplet/triangle
  *
  * A is a structure triplet that contains 3 structures point a,b and c
  * Return value B is a point
  *
  *
  */
 struct  point   mychi_barycenter(struct triplet *A)
 {
 	struct  point   B;
 
 	B.x = (A->a.x + A->b.x + A->c.x) / 3.;
 	B.y = (A->a.y + A->b.y + A->c.y) / 3.;
 	return(B);
 }
 
 /** @brief Euclidean distance between 2 points
  *
  * Euclidean distance between 2 points
  *
  */
 double mychi_dist(struct point A, struct point B)
 {
 	double  x, y;
 	x = A.x - B.x;
 	y = A.y - B.y;
 	return(sqrt(x*x + y*y));
 }
 
diff --git a/Benchmarks/ChiBenchmark/main.cpp b/Benchmarks/ChiBenchmark/main.cpp
index e417141..ec386ff 100644
--- a/Benchmarks/ChiBenchmark/main.cpp
+++ b/Benchmarks/ChiBenchmark/main.cpp
@@ -1,402 +1,395 @@
 /**
  * @file   main.cpp
  * @Author Christoph Schaaefer, EPFL (christophernstrerne.schaefer@epfl.ch)
  * @date   October 2016
  * @brief  Benchmark for gradhalo function
  */
 
 #include <iostream>
 #include <iomanip>
 #include <string.h>
 #include <math.h>
 #include <sys/time.h>
 #include <fstream>
 #include <sys/stat.h>
 #include <unistd.h>
 //
 #include <mm_malloc.h>
 //
 #include <structure_hpc.hpp>
 #include "timer.h"
 #include "gradient.hpp"
 #include "chi_CPU.hpp"
 #include "module_cosmodistances.hpp"
 #include "module_readParameters.hpp"
 
 
 #ifdef __WITH_MPI
 #include<mpi.h>
 #endif
 
 #ifdef _OPENMP
 #include<omp.h>
 #endif
 
 
 //#define __WITH_LENSTOOL 0
 #ifdef __WITH_LENSTOOL
 #warning "linking with libtool..."
 #include <fonction.h>
 #include <constant.h>
 #include <dimension.h>
 #include <structure.h>
 #include <setup.hpp>
 #endif
 
 #ifdef __WITH_LENSTOOL
 struct g_mode   M;
 struct g_pot    P[NPOTFILE];
 struct g_pixel  imFrame, wFrame, ps, PSF;
 struct g_cube   cubeFrame;
 struct g_dyn    Dy;      //   //TV
 
 
 struct g_source S;
 struct g_image  I;
 struct g_grille G;
 struct g_msgrid H;  // multi-scale grid
 struct g_frame  F;
 struct g_large  L;
 struct g_cosmo  C;
 struct g_cline  CL;
 struct g_observ O;
 struct pot      lens[NLMAX];
 struct pot      lmin[NLMAX], lmax[NLMAX], prec[NLMAX];
 struct g_cosmo  clmin, clmax;       /*cosmological limits*/
 struct galaxie  smin[NFMAX], smax[NFMAX];       // limits on source parameters
 struct ipot     ip;
 struct MCarlo   mc;
 struct vfield   vf;
 struct vfield   vfmin,vfmax; // limits on velocity field parameters
 struct cline    cl[NIMAX];
 lensdata *lens_table;
 
 int  block[NLMAX][NPAMAX];      /*switch for the lens optimisation*/
 int  cblock[NPAMAX];                /*switch for the cosmological optimisation*/
 int  sblock[NFMAX][NPAMAX];                /*switch for the source parameters*/
 int  vfblock[NPAMAX];                /*switch for the velocity field parameters*/
 double excu[NLMAX][NPAMAX];
 double excd[NLMAX][NPAMAX];
 /* supplments tableaux de valeurs pour fonctions g pour Einasto
  *  * Ce sont trois variables globales qu'on pourra utiliser dans toutes les fonctions du projet
  *  */
 
 #define CMAX 20
 #define LMAX 80
 
 float Tab1[LMAX][CMAX];
 float Tab2[LMAX][CMAX];
 float Tab3[LMAX][CMAX];
 
 
 int      nrline, ntline, flagr, flagt;
 long int  narclet;
 
 struct point    gimage[NGGMAX][NGGMAX], gsource_global[NGGMAX][NGGMAX];
 struct biline   radial[NMAX], tangent[NMAX];
 struct galaxie  arclet[NAMAX], source[NFMAX], image[NFMAX][NIMAX];
 struct galaxie  cimage[NFMAX];
 struct pointgal     gianti[NPMAX][NIMAX];
 
 struct point    SC;
 double elix;
 double alpha_e;
 
 double *v_xx;
 double *v_yy;
 double **map_p;
 double **tmp_p;
 double **map_axx;
 double **map_ayy;
 #endif
 
 
 void chi_bruteforce_SOA_CPU_grid_gradient(double *chi, int *error, runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, galaxy *images);
 
 
 int module_readCheckInput_readInput(int argc, char *argv[])
 {
 /// check if there is a correct number of arguments, and store the name of the input file in infile
 
 	char* infile;
 	struct stat  file_stat;
 	// If we do not have 3 arguments, stop
 	if ( argc != 3 )
 	{
 		fprintf(stderr, "\nUnexpected number of arguments\n");
 		fprintf(stderr, "\nUSAGE:\n");
 		fprintf(stderr, "lenstool  input_file  output_directorypath  [-n]\n\n");
 		exit(-1);
 	}
 	else if ( argc == 3 )
 		infile=argv[1];
 	std::ifstream ifile(infile,std::ifstream::in); // Open the file
 
 
 	int ts = (int) time (NULL);
 	char buffer[10];
 	std::stringstream ss;
 	ss << ts;
 	std::string trimstamp = ss.str();
 	//
 	std::string outdir = argv[2];
 	outdir += "-";
 	outdir += trimstamp;
 	std::cout << "output directory: " << outdir << std::endl;
 
 	// check whether the output directory already exists
 	if (stat(outdir.c_str(), &file_stat) < 0){
 		mkdir(outdir.c_str(), S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH );
 	}
 	else {
 		printf("Error : Directory %s already exists. Specify a non existing directory.\n",argv[2]);
 		exit(-1);
 	}
 
 	// check whether the input file exists. If it could not be opened (ifile = 0), it does not exist
 	if(ifile){
 		ifile.close();
 	}
 	else{
 		printf("The file %s does not exist, please specify a valid file name\n",infile);
 		exit(-1);
 	}
 	return 0;
 }
 
 int main(int argc, char *argv[])
 {
 	int world_size = 1;
 	int world_rank = 0;
 #ifdef __WITH_MPI	
 	MPI_Init(NULL, NULL);
 	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
 	MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
 	char processor_name[MPI_MAX_PROCESSOR_NAME];
 	int name_len;
 	MPI_Get_processor_name(processor_name, &name_len);
 	MPI_Barrier(MPI_COMM_WORLD);
 
 	// Print off a hello world message
 #endif
 	int numthreads = 1;
 #ifdef _OPENMP
 #warning "using openmp"
 #pragma omp parallel
 	numthreads = omp_get_num_threads();
 #endif
 	//
 	printf("Hello world from processor %s, rank %d out of %d processors and %d threads per rank\n", processor_name, world_rank, world_size, numthreads); fflush(stdout);
 #ifdef __WITH_MPI
 	MPI_Barrier(MPI_COMM_WORLD);
 #endif
 	int verbose = (world_rank == 0);
 	//
 	if (verbose) printf("Lenstool-HPC\n\n");
 	//
 	double wallclock = myseconds();
 	if (world_rank == 0) printf("Reading parameter file at time %f s...\n", myseconds() - wallclock);
 	// Setting Up the problem
 	//===========================================================================================================
 
 	// This module function reads the terminal input when calling LENSTOOL and checks that it is correct
 	// Otherwise it exits LENSTOOL
 	if (world_rank == 0) module_readCheckInput_readInput(argc, argv);
 
 	// This module function reads the cosmology parameters from the parameter file
 	// Input: struct cosmologicalparameters cosmology, parameter file
 	// Output: Initialized cosmology struct
 	cosmo_param cosmology;  // Cosmology struct to store the cosmology data from the file
 	std::string inputFile = argv[1];   // Input file
 	module_readParameters_readCosmology(inputFile, cosmology);
 
 	// This module function reads the runmode paragraph and the number of sources, arclets, etc. in the parameter file.
 	// The runmode_param stores the information of what exactly the user wants to do with lenstool.
 	struct runmode_param runmode;
 	module_readParameters_readRunmode(inputFile, &runmode);
 
 	if (world_rank == 0)
 	{
 		module_readParameters_debug_cosmology(runmode.debug, cosmology);
 		module_readParameters_debug_runmode(runmode.debug, runmode);
 	}
 
 	//=== Declaring variables
 	struct grid_param frame;
 	struct galaxy images[runmode.nimagestot];
 	struct galaxy sources[runmode.nsets];
 	struct Potential lenses[runmode.nhalos+runmode.npotfile-1];
 	struct Potential_SOA lenses_SOA_table[NTYPES];
 	struct Potential_SOA lenses_SOA;
 	struct cline_param cline;
 	struct potfile_param potfile;
 	struct Potential potfilepotentials[runmode.npotfile];
 	struct potentialoptimization host_potentialoptimization[runmode.nhalos];
 	int nImagesSet[runmode.nsets]; // Contains the number of images in each set of imagesnano
 
 	// This module function reads in the potential form and its parameters (e.g. NFW)
 	// Input: input file
 	// Output: Potentials and its parameters
 
 
 	module_readParameters_Potential(inputFile, lenses, runmode.nhalos);
 	//Converts to SOA
 	//module_readParameters_PotentialSOA(inputFile, lenses, lenses_SOA, runmode.Nlens);
 	module_readParameters_PotentialSOA(inputFile, lenses, &lenses_SOA, runmode.nhalos);
 	//module_readParameters_PotentialSOA_nonsorted(inputFile, lenses, &lenses_SOA_nonsorted, runmode.nhalos);
 	//@@module_readParameters_debug_potential(runmode.debug, lenses, runmode.nhalos);
 	//std::cerr << lenses_SOA[1].b0[0] << " " << lenses[0].b0  << std::endl;
 	// This module function reads in the potfiles parameters
 	// Input: input file
 	// Output: Potentials from potfiles and its parameters
 
 	if (runmode.potfile == 1 ){
 		module_readParameters_readpotfiles_param(inputFile, &potfile,cosmology);
 		module_readParameters_debug_potfileparam(runmode.debug, &potfile);
 		module_readParameters_readpotfiles(&runmode,&potfile,lenses);
 		module_readParameters_debug_potential(runmode.debug, lenses, runmode.nhalos+runmode.npotfile);
 
 	}
 
 
 
 	// This module function reads in the grid form and its parameters
 	// Input: input file
 	// Output: grid and its parameters
 
-#if 1
 	module_readParameters_Grid(inputFile, &frame);
 
 	if (runmode.image == 1 or runmode.inverse == 1 or runmode.time > 0){
 
 		// This module function reads in the strong lensing images
 		module_readParameters_readImages(&runmode, images, nImagesSet);
 		//runmode.nsets = runmode.nimagestot;
 		for(int i = 0; i < runmode.nimagestot; ++i){
 
 			images[i].dls = module_cosmodistances_objectObject(lenses[0].z, images[i].redshift, cosmology);
 			images[i].dos = module_cosmodistances_observerObject(images[i].redshift, cosmology);
 			images[i].dr = module_cosmodistances_lensSourceToObserverSource(lenses[0].z, images[i].redshift, cosmology);
 
 		}
 		//module_readParameters_debug_image(runmode.debug, images, nImagesSet,runmode.nsets);
 
 	}
 
 	if (runmode.inverse == 1){
 
 		// This module function reads in the potential optimisation limits
 		module_readParameters_limit(inputFile,host_potentialoptimization,runmode.nhalos);
 		module_readParameters_debug_limit(runmode.debug, host_potentialoptimization[0]);
 	}
 
 
 	if (runmode.source == 1)
 	{
 		//Initialisation to default values.(Setting sources to z = 1.5 default value)
 		for(int i = 0; i < runmode.nsets; ++i)
 		{
 			sources[i].redshift = 1.5;
 		}
 		// This module function reads in the strong lensing sources
 		module_readParameters_readSources(&runmode, sources);
 		//Calculating cosmoratios
 		for(int i = 0; i < runmode.nsets; ++i)
 		{
 			sources[i].dls = module_cosmodistances_objectObject(lenses[0].z, sources[i].redshift, cosmology);
 			sources[i].dos = module_cosmodistances_observerObject(sources[i].redshift, cosmology);
 			sources[i].dr = module_cosmodistances_lensSourceToObserverSource(lenses[0].z, sources[i].redshift, cosmology);
 		}
 		module_readParameters_debug_source(runmode.debug, sources, runmode.nsets);
 	}
 #ifdef __WITH_MPI
 	MPI_Barrier(MPI_COMM_WORLD);
 #endif
 	//
-<<<<<<< HEAD
-	std::cout << "--------------------------" << std::endl << std::endl;
-#endif
-
-=======
 	if (verbose) std::cout << "--------------------------" << std::endl << std::endl;
 	//
->>>>>>> lenstool_mpi
 	// Lenstool Bruteforce
 	//===========================================================================================================
 	if (verbose)
 	{
 #ifdef __WITH_LENSTOOL
 		{
 			printf("Calling lenstool at time %f s\n", myseconds() - wallclock);
 			setup_lenstool();
 			//
 			double chi2;
 			double lhood0(0);
 			int error(0);
 			double time;
 
 			if ( M.ichi2 != 0 )
 			{
 				int error;
 				//NPRINTF(stdout, "INFO: compute chires.dat\n");
 				readConstraints();
 				o_chires("chires.dat");
 				time = -myseconds();
 				error = o_chi_lhood0(&chi2, &lhood0, NULL);
 				time += myseconds();
 				printf("INFO: chi2 %lf  Lhood %lf\n", chi2, -0.5 * ( chi2 + lhood0 )  );
 				o_global_free();
 			}
 
 			std::cout << "Lenstool 6.8.1 chi Benchmark: ";
 			std::cout << " Chi : " << std::setprecision(15) << chi2 ;
 			std::cout << " Time  " << std::setprecision(15) << time << std::endl;
 			o_global_free();
 		}
 #endif
 	}
 
 	// Lenstool-GPU Bruteforce
 	//===========================================================================================================
 #if 0
 	{
 		printf("Calling lenstoolhpc orig at time %f s\n", myseconds() - wallclock);
 		std::cout << "LenstoolHPC dist chi Benchmark:\n ";
 		double chi2;
 		double time;
 		int error;
 		time = -myseconds();
 		mychi_bruteforce_SOA_CPU_grid_gradient_orig(&chi2, &error, &runmode, &lenses_SOA, &frame, nImagesSet, images);
 		time += myseconds();
 
 		std::cout << " Chi : " << std::setprecision(15) << chi2;
 		std::cout << " Time  " << std::setprecision(15) << time << std::endl;
 	}
 #endif
 
 
 
 #if 0
 	{
 		//std::cout << "MylenstoolHPC chi Benchmark:\n "; 
 		if (verbose) printf("Calling lenstoolhpc at time %f s\n", myseconds() - wallclock);
 		double chi2;
 		double time;
 		int error;
 		time = -myseconds();
 		mychi_bruteforce_SOA_CPU_grid_gradient(&chi2, &error, &runmode, &lenses_SOA, &frame, nImagesSet, images);
 		time += myseconds();
 		if (verbose)
 		{
 			std::cout << " Chi : " << std::setprecision(15) << chi2;
 			std::cout << " Time  " << std::setprecision(15) << time << std::endl;
 		}
 	}
 #endif
 
 	if (verbose) printf("Ending execution at time %f s\n", myseconds() - wallclock);
 #ifdef __WITH_MPI
 	MPI_Finalize();
 #endif
 
 }