diff --git a/Benchmarks/ChiBenchmark/chi_CPU.cpp b/Benchmarks/ChiBenchmark/chi_CPU.cpp index a06e5f2..b7f660b 100644 --- a/Benchmarks/ChiBenchmark/chi_CPU.cpp +++ b/Benchmarks/ChiBenchmark/chi_CPU.cpp @@ -1,827 +1,828 @@ #include <iostream> #include <string.h> //#include <cuda_runtime.h> #include <math.h> #include <sys/time.h> #include <fstream> #include <unistd.h> #include <assert.h> #ifndef __xlC__ #warning "gnu compilers" #include <immintrin.h> #endif //#include "simd_math.h" #include "chi_CPU.hpp" //#ifdef __WITH_GPU #include "grid_gradient_GPU.cuh" ////#endif //#include "gradient_GPU.cuh" #ifdef _OPENMP #include <omp.h> #endif #ifdef __WITH_MPI #include<mpi.h> #include"mpi_check.h" #endif #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) double myseconds() { struct timeval tp; struct timezone tzp; int i; i = gettimeofday(&tp,&tzp); return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); } void mychi_bruteforce_SOA_CPU_grid_gradient(double *chi, int *error, runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, galaxy *images) { // //double dx, dy; //, x_pos, y_pos; //pixelsize // // double im_dist[MAXIMPERSOURCE]; // distance to a real image for an "theoretical" image found from a theoretical source // int im_index; // index of the closest real image for an image found from a theoretical source // int second_closest_id; // index of the second closest real image for an image found from a theoretical source // int thread_found_image = 0; // at each iteration in the lens plane, turn to 1 whether the thread find an image // struct point im_position, temp; // position of the image found from a theoretical source + temp variable for comparison // struct triplet Tsup, Tinf, Tsupsource, Tinfsource;// triangles for the computation of the images created by the theoretical sources struct galaxy sources[runmode->nsets][runmode->nimagestot]; // theoretical sources (common for a set) int nimagesfound [runmode->nsets][runmode->nimagestot][MAXIMPERSOURCE]; // number of images found from the theoretical sources int locimagesfound[runmode->nsets][runmode->nimagestot]; struct point image_pos [runmode->nsets][runmode->nimagestot][MAXIMPERSOURCE]; //double image_dist [runmode->nsets][runmode->nimagestot][MAXIMPERSOURCE]; struct point tim [runmode->nimagestot][MAXIMPERSOURCE]; // theoretical images (computed from sources) // int world_size = 1; int world_rank = 0; #ifdef __WITH_MPI MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); MPI_Barrier(MPI_COMM_WORLD); #endif unsigned int verbose = (world_rank == 0); // int grid_size = runmode->nbgridcells; int loc_grid_size = runmode->nbgridcells/world_size; // double y_len = fabs(frame->ymax - frame->ymin); int y_len_loc = runmode->nbgridcells/world_size; int y_pos_loc = (int) world_rank*y_len_loc; int y_bound = y_len_loc; if ((world_rank + 1) != world_size) y_bound++; // // const double dx = (frame->xmax - frame->xmin)/(runmode->nbgridcells - 1); const double dy = (frame->ymax - frame->ymin)/(runmode->nbgridcells - 1); // double *grid_gradient_x, *grid_gradient_y; // //grid_gradient_x = (double *)malloc((int) grid_size*loc_grid_size*sizeof(double)); //grid_gradient_y = (double *)malloc((int) grid_size*loc_grid_size*sizeof(double)); grid_gradient_x = (double *)malloc((int) grid_size*y_bound*sizeof(double)); grid_gradient_y = (double *)malloc((int) grid_size*y_bound*sizeof(double)); // //uais //if (verbose) printf("@@%d: nsets = %d nimagestot = %d maximgpersource = %d, grid_size = %d, loc_grid_size = %d, y_pos_loc = %d\n", world_rank, runmode->nsets, runmode->nimagestot, MAXIMPERSOURCE, grid_size, loc_grid_size, y_pos_loc); // const int grid_dim = runmode->nbgridcells; //Packaging the image to sourceplane conversion double time = -myseconds(); //gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_dim); #ifdef __WITH_GPU gradient_grid_GPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, 0, y_pos_loc); //gradient_grid_GPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_size); #else - gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, 0, y_pos_loc); + int zero = 0; + gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, zero, y_pos_loc); #endif #if 0 if (world_rank == 0) for (int jj = 0; jj < loc_grid_size; ++jj) for (int ii = 0; ii < runmode->nbgridcells; ++ii) printf("%d %d: %f %f\n", ii, jj, grid_gradient_x[jj*grid_size + ii], grid_gradient_y[jj*grid_size + ii]); #endif //gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_size, loc_grid_size); #ifdef __WITH_MPI MPI_Barrier(MPI_COMM_WORLD); #endif time += myseconds(); if (verbose) printf(" Gridgrad time = %f s.\n", time); // int index = 0; // index tracks the image within the total image array in the image plane *chi = 0; // double chi2_time = 0.; double loop_time = 0.; double image_time = 0.; // int images_found = 0; long int images_total = 0; // //printf("@@nsets = %d nx = %d ny = %d, xmin = %f, dx = %f, ymin = %f, dy = %f\n", runmode->nsets, runmode->nbgridcells, runmode->nbgridcells, frame->xmin, dx, frame->ymin, dy ); // int numsets = 0; // for( int source_id = 0; source_id < runmode->nsets; source_id ++) numsets += nimages_strongLensing[source_id]; //printf("@@Total numsets = %d\n", numsets); // time = -myseconds(); // // nsets : number of images in the source plane // nimagestot: number of images in the image plane // image_time -= myseconds(); // unsigned int nsets = runmode->nsets; for( int source_id = 0; source_id < runmode->nsets; source_id ++) { // number of images in the image plane for the specific image (1,3,5...) unsigned short int nimages = nimages_strongLensing[source_id]; //@@printf("@@ source_id = %d, nimages = %d\n", source_id, nimages_strongLensing[source_id]); //____________________________ image (constrains) loop ________________________________ for(unsigned short int image_id = 0; image_id < nimages; image_id++) { //printf("@@ nimages = %d\n", nimages_strongLensing[source_id]); //________ computation of theoretical sources _________ // output: sources[source_id].center //printf("Image = %f %f\n", images[index + image_id].center.x, images[index + image_id].center.y); mychi_transformImageToSourcePlane_SOA(runmode->nhalos, &images[index + image_id].center, images[index + image_id].dr, lens, &sources[source_id][image_id].center); // // void mychi_transformImageToSourcePlane_SOA(const int Nlens, const struct point *image_point, double dlsds, const struct Potential_SOA *lens, struct point *source_point) // struct point Grad = module_potentialDerivatives_totalGradient_SOA(&images[index + image_id].center, lens, runmode->nhalos); //printf(" image %d, %d (%d) = (%.15f, %.15f) -> (%.15f, %.15f)\n", source_id, image_id, nimages, images[index + image_id].center.x, images[index + image_id].center.y, sources[source_id].center.x, sources[source_id].center.y ); //printf("Grad.x = %f, %f\n", Grad.x, grid_gradient_x[images[index + image_id].center.x/dx]); // // find the position of the constrain in the source plane sources[source_id][image_id].center.x = images[index + image_id].center.x - images[index + image_id].dr*Grad.x; sources[source_id][image_id].center.y = images[index + image_id].center.y - images[index + image_id].dr*Grad.y; // //time += myseconds(); // sources[source_id][image_id].redshift = images[index + image_id].redshift; // sources[source_id][image_id].dr = images[index + image_id].dr; sources[source_id][image_id].dls = images[index + image_id].dls; sources[source_id][image_id].dos = images[index + image_id].dos; #if 1 } index += nimages; } #ifdef __WITH_MPI MPI_Barrier(MPI_COMM_WORLD); #endif image_time += myseconds(); // // main loop // //double y_len = fabs(frame->ymax - frame->ymin); //int y_len_loc = runmode->nbgridcells/world_size; //int y_pos_loc = (int) world_rank*y_len_loc; //printf("%d out of %d: y_id = %d to %d\n", world_rank, world_size, y_pos_loc, y_pos_loc + y_len_loc - 1); //fflush(stdout); //MPI_Barrier(MPI_COMM_WORLD); // loop_time -= myseconds(); index = 0; int numimg = 0; // for( int source_id = 0; source_id < runmode->nsets; source_id ++) { // number of images in the image plane for the specific image (1,3,5...) unsigned short int nimages = nimages_strongLensing[source_id]; //printf("@@ source_id = %d, nimages = %d\n", source_id, nimages_strongLensing[source_id]); //____________________________ image (constrains) loop ________________________________ for(unsigned short int image_id = 0; image_id < nimages; image_id++) { #endif // //struct point image_pos [MAXIMPERSOURCE]; // //MPI_Barrier(MPI_COMM_WORLD); //if (verbose) printf("source = %d, image = %d\n", source_id, image_id); //if (verbose) fflush(stdout); int loc_images_found = 0; //#ifdef __WITH_MPI // MPI_Barrier(MPI_COMM_WORLD); //#endif #pragma omp parallel #pragma omp for reduction(+: images_total) //for (int y_id = 0; y_id < (runmode->nbgridcells - 1); ++y_id ) //for (int y_id = 0; y_id < (y_len_loc - 1); ++y_id) for (int y_id = 0; y_id < (y_bound - 1); ++y_id) //for (int y_id = world_rank*y_pos_loc; y_id < (world_rank*y_pos_loc + y_len_loc - 1); ++y_id) { //for (int y_id = 0; (y_id < runmode->nbgridcells - 1) /*&& (loc_images_found != nimages)*/; ++y_id ) for (int x_id = 0; x_id < runmode->nbgridcells - 1 ; ++x_id) { //int yy_pos = MIN(y_pos_loc + y_id, runmode->nbgridcells - 1); images_total++; // double x_pos = frame->xmin + ( x_id)*dx; double y_pos = frame->ymin + (y_pos_loc + y_id)*dy; //printf("%d: x_id = %d xpos = %f, y_id = %d ypos = %f\n", world_rank, x_id, x_pos, y_pos_loc + y_id, y_pos); // // Define the upper + lower triangle, both together = square = pixel // struct triplet Tsup, Tinf; // Tsup.a.x = x_pos; Tsup.b.x = x_pos; Tsup.c.x = x_pos + dx; Tinf.a.x = x_pos + dx; Tinf.b.x = x_pos + dx; Tinf.c.x = x_pos; // Tsup.a.y = y_pos; Tsup.b.y = y_pos + dy; Tsup.c.y = y_pos; Tinf.a.y = y_pos + dy; Tinf.b.y = y_pos; Tinf.c.y = y_pos + dy; // // Lens to Sourceplane conversion of triangles // // double time = -myseconds(); // struct triplet Timage; struct triplet Tsource; // //printf(" - Tsup = %f %f\n", Tsup.a.x, Tsup.a.y); mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_upper(&Tsup, sources[source_id][image_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, y_id*grid_dim + x_id, grid_dim); //mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_upper(&Tsup, sources[source_id][image_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, (y_pos_loc + y_id)*grid_dim + x_id, grid_dim); //@@if (world_rank == 1) // int thread_found_image = 0; // if (mychi_insideborder(&sources[source_id][image_id].center, &Tsource) == 1) { //printf(" -> %d found: y_id = %d, x_id = %d : pixel %f %f -> %f %f\n", world_rank, y_pos_loc + y_id, x_id, Tsup.a.x, Tsup.a.y, Tsource.a.x, Tsource.a.y); thread_found_image = 1; Timage = Tsup; } else { //printf(" - Tinf = %f %f\n", Tinf.a.x, Tinf.a.y); mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_lower(&Tinf, sources[source_id][image_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, y_id*grid_dim + x_id, grid_dim); //mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_lower(&Tinf, sources[source_id][image_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, (y_id + y_pos_loc)*grid_dim + x_id, grid_dim); //@@if (world_rank == 1) if (mychi_inside(&sources[source_id][image_id].center, &Tsource) == 1) { //printf(" -> %d found: y_id = %d, x_id = %d : pixel %f %f -> %f %f\n", world_rank, y_pos_loc + y_id, x_id, Tinf.a.x, Tinf.a.y, Tsource.a.x, Tsource.a.y); thread_found_image = 1; Timage = Tinf; } } #if 1 if (thread_found_image) { #pragma omp critical { image_pos[source_id][image_id][loc_images_found] = mychi_barycenter(&Timage); // get the barycenter of the triangle locimagesfound[source_id][image_id]++; loc_images_found++; numimg ++; } } #endif } } #if 1 } index += nimages_strongLensing[source_id]; } #ifdef __WITH_MPI MPI_Barrier(MPI_COMM_WORLD); #endif loop_time += myseconds(); // double comm_time = -myseconds(); // int numimagesfound [runmode->nsets][runmode->nimagestot]; memset(&numimagesfound, 0, runmode->nsets*runmode->nimagestot*sizeof(int)); struct point imagesposition [runmode->nsets][runmode->nimagestot][MAXIMPERSOURCE]; memset(&imagesposition, 0, runmode->nsets*runmode->nimagestot*MAXIMPERSOURCE*sizeof(point)); // int numimagesfound_tmp[runmode->nsets][runmode->nimagestot]; struct point imagesposition_tmp[runmode->nsets][runmode->nimagestot][MAXIMPERSOURCE]; // memset(numimagesfound_tmp, 0, runmode->nsets*runmode->nimagestot*sizeof(int)); memset(imagesposition_tmp, 0, runmode->nsets*runmode->nimagestot*sizeof(int)); /* //if (verbose) { int image_sum = 0; for (int ii = 0; ii < runmode->nsets; ++ii) for (int jj = 0; jj < runmode->nimagestot; ++jj) { image_sum += locimagesfound[ii][jj]; } printf("%d: num images found = %d\n", world_rank, image_sum); } MPI_Barrier(MPI_COMM_WORLD); */ #ifdef __WITH_MPI int total = 0; //MPI_Reduce(&locimagesfound, &imagesfound, runmode->nsets*runmode->nimagestot, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); //MPI_Reduce(&locimagesfound, &imagesfound, runmode->nsets*runmode->nimagestot, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); // if (!verbose) { MPI_CHECK(MPI_Send( &numimg , 1 , MPI_INT , 0, 666 + world_rank, MPI_COMM_WORLD )); if (numimg != 0) { MPI_CHECK(MPI_Send( &locimagesfound, runmode->nsets*runmode->nimagestot , MPI_INT , 0, 666 + world_rank, MPI_COMM_WORLD )); //MPI_CHECK(MPI_Send( &image_pos, runmode->nsets*runmode->nimagestot*MAXIMPERSOURCE, MPI_points, 0, 667 + world_rank, MPI_COMM_WORLD )); MPI_CHECK(MPI_Send( &image_pos, runmode->nsets*runmode->nimagestot*MAXIMPERSOURCE*2, MPI_DOUBLE, 0, 667 + world_rank, MPI_COMM_WORLD )); } } // if (verbose) { int image_sum = 0; // for (int ipe = 0; ipe < world_size; ++ipe) { MPI_Status status; // if (ipe == 0) { memcpy(&numimagesfound_tmp, &locimagesfound, runmode->nsets*runmode->nimagestot*sizeof(int)); memcpy(&imagesposition_tmp, &image_pos, runmode->nsets*runmode->nimagestot*MAXIMPERSOURCE*sizeof(point)); } else { MPI_CHECK(MPI_Recv(&numimg , 1 , MPI_INT , ipe, 666 + ipe, MPI_COMM_WORLD, &status)); if (numimg != 0) { MPI_CHECK(MPI_Recv(&numimagesfound_tmp, runmode->nsets*runmode->nimagestot , MPI_INT , ipe, 666 + ipe, MPI_COMM_WORLD, &status)); MPI_CHECK(MPI_Recv(&imagesposition_tmp, runmode->nsets*runmode->nimagestot*MAXIMPERSOURCE*2, MPI_DOUBLE, ipe, 667 + ipe, MPI_COMM_WORLD, &status)); } } // //MPI_Reduce(&imagesfound_tmp, &total, ipe, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); // if (numimg != 0) for (int jj = 0; jj < runmode->nimagestot; ++jj) { for (int ii = 0; ii < runmode->nsets; ++ii) { //int img_len = numimagesfound[ii][jj]; int img_len = numimagesfound_tmp[ii][jj]; //printf("%d: %d %d, img_len = %d\n", ipe, ii, jj, img_len); image_sum += img_len; if (img_len != 0) { //int loc_length = numimagesfound[ii][jj]; int loc_length = numimagesfound[ii][jj]; memcpy(&imagesposition[ii][jj][loc_length], &imagesposition_tmp[ii][jj], img_len*sizeof(point)); numimagesfound[ii][jj] += img_len; numimg += img_len; } } } } } //MPI_Barrier(MPI_COMM_WORLD); comm_time += myseconds(); #endif // // image extraction to compute // chi2_time = -myseconds(); index = 0; // if (verbose) for( int source_id = 0; source_id < runmode->nsets; source_id ++) { unsigned short int nimages = nimages_strongLensing[source_id]; // //______________________Initialisation______________________ // for (unsigned short int i = 0; i < nimages; ++i) for (unsigned short int j = 0; j < nimages; ++j) nimagesfound[source_id][i][j] = 0; // for( unsigned short int image_id = 0; image_id < nimages; image_id++) { #endif // double image_dist[MAXIMPERSOURCE]; // //printf(" Image %d, number of sources found %d\n", image_id, loc_images_found); // struct point image_position; int image_index; // for (int ii = 0; ii < /*loc*/numimagesfound[source_id][image_id]; ++ii) { // // int image_index = 0; // image_position = imagesposition[source_id][image_id][ii]; image_dist[0] = mychi_dist(image_position, images[index + 0].center); // get the distance to the real image for(int i = 1; i < nimages_strongLensing[source_id]; i++) { // get the distance to each real image and keep the index of the closest real image image_dist[i] = mychi_dist(image_position, images[index + i].center); //printf(" *** image %d = %f %f, distance = %f\n", index + i, images[index + i].center.x, images[index + i].center.y, image_dist[i]); if (image_dist[i] < image_dist[image_index]) { image_index = i; } } // // we should exit loops here // // p1_time += myseconds(); // int skip_image = 0; // Sometimes due to the numerical errors at the centerpoint, // for SIE potentials an additional image will appear at the center of the Potential. // This is due to the fact that it is not possible to simulate an infinity value // at the center correctly, Check that sis correspond to Nlens[0] for (int iterator = 0; iterator < runmode->Nlens[0]; ++iterator) { if ( fabs(image_position.x - lens[0].position_x[iterator]) <= dx/2. and fabs(image_position.y - lens[0].position_y[iterator]) <= dx/2.) { skip_image = 1; printf("WARNING: You are using SIE potentials. An image to close to one of the potential centers has been classified as numerical error and removed \n"); } } //printf("%d %d %d %d\n", x_id, y_id,thread_found_image, skip_image); if (!skip_image) { //#pragma omp atomic images_found++; struct point temp; //printf(" source %d, image %d, index %d, Images found: %d\n", source_id, image_id, image_index, nimagesfound[source_id][image_id][image_index]); //checking whether a closest image has already been found if (nimagesfound[source_id][image_id][image_index] == 0) { // if no image found up to now //image position is allocated to theoretical image //#pragma omp critical tim[image_id][image_index] = image_position; //#pragma omp atomic nimagesfound[source_id][image_id][image_index]++; } else if (nimagesfound[source_id][image_id][image_index] > 0) { // if we have already found an image // If the new image we found is closer than the previous image //printf(" tim2: %f %f\n", image_dist[image_index], mychi_dist(images[index + image_index].center, tim[image_id][image_index])); if (image_dist[image_index] < mychi_dist(images[index + image_index].center, tim[image_id][image_index])) { temp = tim[image_id][image_index]; // we store the position of the old image in temp //#pragma omp critical tim[image_id][image_index] = image_position; // we link the observed image with the image we just found //printf("tim2 %d %d = %f %f\n", image_id, image_index, image_position.x, image_position.y); } else { temp = image_position; // we store the position of the image we just found in temp } // initialising second_closest_id to the highest value // Loop over all images in the set except the closest one // and initialize to the furthest away image int second_closest_id = 0; for (int i = 1; i < nimages_strongLensing[source_id] && (i != image_index); i++) { if(image_dist[i] > image_dist[second_closest_id]) second_closest_id=i; } /////////////////////////////////////////////////////////////// // Loop over all images in the set that are not yet allocated to a theoretical image // and allocate the closest one // we search for an observed image not already linked (nimagesfound=0) for(int i = 0; i < nimages_strongLensing[source_id] && nimagesfound[source_id][image_id][i] == 0; i++) { if(image_dist[i] < image_dist[second_closest_id]) { second_closest_id = i; // im_index value changes only if we found a not linked yet image image_index = i; //printf("tim3 %d %d = %f %f\n", image_id, image_index, temp.x, temp.y); //#pragma omp critical tim[image_id][image_index] = temp; // if we found an observed and not already linked image, we allocate the theoretical image temp } } // increasing the total number of images found (If we find more than 1 theoretical image linked to 1 real image, // these theoretical //#pragma omp atomic nimagesfound[source_id][image_id][image_index]++; // images are included in this number) } } //#pragma omp atomic //loc_images_found++; //thread_found_image = 0; // for next iteration } // } //#pragma omp barrier //____________________________ end of image loop // //____________________________ computing the local chi square // double chiimage; // int _nimages = nimages_strongLensing[source_id]; // for( int iter = 0; iter < _nimages*_nimages; iter++) { int i=iter/nimages_strongLensing[source_id]; int j=iter%nimages_strongLensing[source_id]; //printf("nimagesfound %d %d = %d\n", i, j, nimagesfound[i][j]); if(i != j) { // In the current method, we get the source in the source plane by ray tracing image in nimagesfound[i][i]. If we ray trace back, // we arrive again at the same position and thus the chi2 from it is 0. Thus we do not calculate the chi2 (-> if i!=j) if(nimagesfound[source_id][i][j] > 0) { double pow1 = images[index + j].center.x - tim[i][j].x; double pow2 = images[index + j].center.y - tim[i][j].y; // //chiimage = pow(images[index + j].center.x - tim[i][j].x, 2) + pow(images[index + j].center.y - tim[i][j].y, 2); // compute the chi2 chiimage = pow1*pow1 + pow2*pow2; // compute the chi2 //printf("%d %d = %.15f\n", i, j, chiimage); *chi += chiimage; } else if(nimagesfound[source_id][i][j] == 0) { // If we do not find a correpsonding image, we add a big value to the chi2 to disfavor the model *chi += 100.*nimages_strongLensing[source_id]; } } } // //____________________________ end of computing the local chi square // //printf("%d: chi = %.15f\n", source_id, *chi); /* for (int i=0; i < nimages_strongLensing[source_id]; ++i){ for (int j=0; j < nimages_strongLensing[source_id]; ++j){ printf(" %d",nimagesfound[i][j]); } printf("\n"); }*/ //Incrementing Index: Images already treated by previous source_id index += nimages_strongLensing[source_id]; } MPI_Barrier(MPI_COMM_WORLD); // chi2_time += myseconds(); time += myseconds(); // if (verbose) { // // int nthreads = 1; // //#pragma omp parallel // nthreads = omp_get_num_threads(); // printf(" overall time = %f s.\n", time); printf(" - image time = %f s.\n", image_time); printf(" - loop time = %f s.\n", loop_time); printf(" - comm time = %f s.\n", comm_time); printf(" - chi2 time = %f s.\n", chi2_time); // printf(" images found: %d out of %ld\n", images_found, images_total); } // free(grid_gradient_x); free(grid_gradient_y); } /** @brief Tranform a point from image to source plane. Result stored in sourcepoint argument * * Tranform a point from image to source plane using lensequation * * @param image_point image position * @param dlsds dls/ds * @param nhalos number of halos * @param potential_param gravitational potential information * @param source_point address where source information will be stored * * */ void mychi_transformImageToSourcePlane(const runmode_param *runmode, const struct point *image_point, double dlsds, const struct Potential *lens, struct point *source_point) { // dlsds is the distance between lens and source divided by the distance observer-source struct point Grad; // gradient Grad = module_potentialDerivatives_totalGradient(runmode->nhalos, image_point, lens); //Grad = module_potentialDerivatives_totalGradient_SOA(image_point, lens, runmode->Nlens); source_point->x = image_point->x - dlsds*Grad.x; source_point->y = image_point->y - dlsds*Grad.y; //printf("dlsds %f", dlsds); } void mychi_transformImageToSourcePlane_SOA(const int Nlens, const struct point *image_point, double dlsds, const struct Potential_SOA *lens, struct point *source_point) { // dlsds is the distance between lens and source divided by the distance observer-source struct point Grad; // gradient Grad = module_potentialDerivatives_totalGradient_SOA(image_point, lens, Nlens); // source_point->x = image_point->x - dlsds*Grad.x; source_point->y = image_point->y - dlsds*Grad.y; //printf("dlsds %f", dlsds); } // // // inline void mychi_transformImageToSourcePlane_SOA_Packed( const struct point *image_point, double dlsds, struct point *source_point, double *grad_x, double * grad_y, int grad_id) { source_point->x = image_point->x - dlsds*grad_x[grad_id]; source_point->y = image_point->y - dlsds*grad_y[grad_id]; int world_rank; //MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); //if (world_rank == 1) //printf(" %d: %f %f = %f %f - dlsds = %f grad id = %d grad = (%f %f)\n", world_rank, source_point->x, source_point->y, image_point->x, image_point->y, dlsds, grad_id, grad_x[grad_id], grad_y[grad_id]); //printf("dlsds %f", dlsds); } /** @brief Tranform a triangle from image to source plane. Result stored in S triangle argument * * Return a triplet of points in the source plane corresponding to the triplet * of images. dlsds is the lens efficiency at the source redshift. * I is the triangle in the image plane (input), S is the same triangle in the source plane (output) * * @param I triangle in image plane * @param dlsds dls/ds * @param nhalos number of halos * @param potential_param gravitational potential information * @param S address where triangle source information will be stored * * */ inline void mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_upper( struct triplet *I, double dlsds, struct triplet *S, double *grad_x, double * grad_y, int grad_id, int nbgridcell) { mychi_transformImageToSourcePlane_SOA_Packed( &I->a, dlsds, &S->a, grad_x, grad_y, grad_id ); mychi_transformImageToSourcePlane_SOA_Packed( &I->b, dlsds, &S->b, grad_x, grad_y, grad_id + nbgridcell); mychi_transformImageToSourcePlane_SOA_Packed( &I->c, dlsds, &S->c, grad_x, grad_y, grad_id + 1); } inline void mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_lower( struct triplet *I, double dlsds, struct triplet *S, double *grad_x, double * grad_y, int grad_id, int nbgridcell) { mychi_transformImageToSourcePlane_SOA_Packed( &I->a, dlsds, &S->a, grad_x, grad_y, grad_id + nbgridcell + 1); mychi_transformImageToSourcePlane_SOA_Packed( &I->b, dlsds, &S->b, grad_x, grad_y, grad_id + 1); mychi_transformImageToSourcePlane_SOA_Packed( &I->c, dlsds, &S->c, grad_x, grad_y, grad_id + nbgridcell ); } /** @brief Return the scalar triple product (a*b).c of the 3 vectors A[x,y,1], B[x,y,1], C[x,y,1]. * If 2 of the 3 vectors are equal, colinear or form an orthogonal basis, * the triple product is 0. * This is also the determinant of the matrix * | Ax Bx Cx | * | Ay By Cy | * | 1 1 1 | */ //inline double mychi_determinant(const struct point *A, const struct point *B, const struct point *C) { return( B->x*C->y - B->y*C->x + A->x*B->y - A->y*B->x + A->y*C->x - A->x*C->y ); } /** @brief Return 1 if P is inside the triangle T, 0 otherwise. * Return 1 if P is inside the triangle T, 0 otherwise. * @param P a point * @param T a triplet of points. * * */ inline int mychi_inside(const struct point *P, struct triplet *T) { double s, s1, s2, d; d = mychi_determinant(&T->a, &T->b, &T->c); s = mychi_determinant(&T->a, &T->b, P)*d; if (s < 0.) return 0; s1 = mychi_determinant(&T->b, &T->c, P)*d; if (s1 < 0.) return 0; s2 = mychi_determinant(&T->c, &T->a, P)*d; if (s2 < 0.) return 0; return 1; return((s > 0.) && (s1 > 0.) && (s2 > 0.)); // If all determinants are positive, // the point must be inside the triangle } /* int mychi_inside2(const struct point *A, const struct point *B, const struct point *C) { // Compute vectors v0 = C - A; v1 = B - A; v2 = P - A; // Compute dot products dot00 = dot(v0, v0); dot01 = dot(v0, v1); dot02 = dot(v0, v2); dot11 = dot(v1, v1); dot12 = dot(v1, v2); // Compute barycentric coordinates invDenom = 1 / (dot00 * dot11 - dot01 * dot01); u = (dot11 * dot02 - dot01 * dot12) * invDenom; v = (dot00 * dot12 - dot01 * dot02) * invDenom; // Check if point is in triangle return (u >= 0) && (v >= 0) && (u + v < 1); } */ /** @brief Return 1 if P is inside the triangle T or on its border, 0 otherwise. * * Return 1 if P is inside the triangle T or on its border, 0 otherwise. * @param P a point * @param T a triplet of points. * * */ inline int mychi_insideborder(const struct point *P, struct triplet *T) { double s, s1, s2, d; d = mychi_determinant(&T->a, &T->b, &T->c); s = mychi_determinant(&T->a, &T->b, P)*d; if (s < 0.) return 0; s1 = mychi_determinant(&T->b, &T->c, P)*d; if (s1 < 0.) return 0; s2 = mychi_determinant(&T->c, &T->a, P)*d; if (s2 < 0.) return 0; return 1; return((s >= 0.) && (s1 >= 0.) && (s2 >= 0.)); // If all determinants are positive or 0, // the point must be inside the triangle or on its border } /** @brief Barycentre of a triplet/triangle * * A is a structure triplet that contains 3 structures point a,b and c * Return value B is a point * * */ struct point mychi_barycenter(struct triplet *A) { struct point B; B.x = (A->a.x + A->b.x + A->c.x) / 3.; B.y = (A->a.y + A->b.y + A->c.y) / 3.; return(B); } /** @brief Euclidean distance between 2 points * * Euclidean distance between 2 points * */ double mychi_dist(struct point A, struct point B) { double x, y; x = A.x - B.x; y = A.y - B.y; return(sqrt(x*x + y*y)); } diff --git a/Benchmarks/ChiBenchmark/main.cpp b/Benchmarks/ChiBenchmark/main.cpp index e417141..ec386ff 100644 --- a/Benchmarks/ChiBenchmark/main.cpp +++ b/Benchmarks/ChiBenchmark/main.cpp @@ -1,402 +1,395 @@ /** * @file main.cpp * @Author Christoph Schaaefer, EPFL (christophernstrerne.schaefer@epfl.ch) * @date October 2016 * @brief Benchmark for gradhalo function */ #include <iostream> #include <iomanip> #include <string.h> #include <math.h> #include <sys/time.h> #include <fstream> #include <sys/stat.h> #include <unistd.h> // #include <mm_malloc.h> // #include <structure_hpc.hpp> #include "timer.h" #include "gradient.hpp" #include "chi_CPU.hpp" #include "module_cosmodistances.hpp" #include "module_readParameters.hpp" #ifdef __WITH_MPI #include<mpi.h> #endif #ifdef _OPENMP #include<omp.h> #endif //#define __WITH_LENSTOOL 0 #ifdef __WITH_LENSTOOL #warning "linking with libtool..." #include <fonction.h> #include <constant.h> #include <dimension.h> #include <structure.h> #include <setup.hpp> #endif #ifdef __WITH_LENSTOOL struct g_mode M; struct g_pot P[NPOTFILE]; struct g_pixel imFrame, wFrame, ps, PSF; struct g_cube cubeFrame; struct g_dyn Dy; // //TV struct g_source S; struct g_image I; struct g_grille G; struct g_msgrid H; // multi-scale grid struct g_frame F; struct g_large L; struct g_cosmo C; struct g_cline CL; struct g_observ O; struct pot lens[NLMAX]; struct pot lmin[NLMAX], lmax[NLMAX], prec[NLMAX]; struct g_cosmo clmin, clmax; /*cosmological limits*/ struct galaxie smin[NFMAX], smax[NFMAX]; // limits on source parameters struct ipot ip; struct MCarlo mc; struct vfield vf; struct vfield vfmin,vfmax; // limits on velocity field parameters struct cline cl[NIMAX]; lensdata *lens_table; int block[NLMAX][NPAMAX]; /*switch for the lens optimisation*/ int cblock[NPAMAX]; /*switch for the cosmological optimisation*/ int sblock[NFMAX][NPAMAX]; /*switch for the source parameters*/ int vfblock[NPAMAX]; /*switch for the velocity field parameters*/ double excu[NLMAX][NPAMAX]; double excd[NLMAX][NPAMAX]; /* supplments tableaux de valeurs pour fonctions g pour Einasto * * Ce sont trois variables globales qu'on pourra utiliser dans toutes les fonctions du projet * */ #define CMAX 20 #define LMAX 80 float Tab1[LMAX][CMAX]; float Tab2[LMAX][CMAX]; float Tab3[LMAX][CMAX]; int nrline, ntline, flagr, flagt; long int narclet; struct point gimage[NGGMAX][NGGMAX], gsource_global[NGGMAX][NGGMAX]; struct biline radial[NMAX], tangent[NMAX]; struct galaxie arclet[NAMAX], source[NFMAX], image[NFMAX][NIMAX]; struct galaxie cimage[NFMAX]; struct pointgal gianti[NPMAX][NIMAX]; struct point SC; double elix; double alpha_e; double *v_xx; double *v_yy; double **map_p; double **tmp_p; double **map_axx; double **map_ayy; #endif void chi_bruteforce_SOA_CPU_grid_gradient(double *chi, int *error, runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, galaxy *images); int module_readCheckInput_readInput(int argc, char *argv[]) { /// check if there is a correct number of arguments, and store the name of the input file in infile char* infile; struct stat file_stat; // If we do not have 3 arguments, stop if ( argc != 3 ) { fprintf(stderr, "\nUnexpected number of arguments\n"); fprintf(stderr, "\nUSAGE:\n"); fprintf(stderr, "lenstool input_file output_directorypath [-n]\n\n"); exit(-1); } else if ( argc == 3 ) infile=argv[1]; std::ifstream ifile(infile,std::ifstream::in); // Open the file int ts = (int) time (NULL); char buffer[10]; std::stringstream ss; ss << ts; std::string trimstamp = ss.str(); // std::string outdir = argv[2]; outdir += "-"; outdir += trimstamp; std::cout << "output directory: " << outdir << std::endl; // check whether the output directory already exists if (stat(outdir.c_str(), &file_stat) < 0){ mkdir(outdir.c_str(), S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP | S_IROTH ); } else { printf("Error : Directory %s already exists. Specify a non existing directory.\n",argv[2]); exit(-1); } // check whether the input file exists. If it could not be opened (ifile = 0), it does not exist if(ifile){ ifile.close(); } else{ printf("The file %s does not exist, please specify a valid file name\n",infile); exit(-1); } return 0; } int main(int argc, char *argv[]) { int world_size = 1; int world_rank = 0; #ifdef __WITH_MPI MPI_Init(NULL, NULL); MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); char processor_name[MPI_MAX_PROCESSOR_NAME]; int name_len; MPI_Get_processor_name(processor_name, &name_len); MPI_Barrier(MPI_COMM_WORLD); // Print off a hello world message #endif int numthreads = 1; #ifdef _OPENMP #warning "using openmp" #pragma omp parallel numthreads = omp_get_num_threads(); #endif // printf("Hello world from processor %s, rank %d out of %d processors and %d threads per rank\n", processor_name, world_rank, world_size, numthreads); fflush(stdout); #ifdef __WITH_MPI MPI_Barrier(MPI_COMM_WORLD); #endif int verbose = (world_rank == 0); // if (verbose) printf("Lenstool-HPC\n\n"); // double wallclock = myseconds(); if (world_rank == 0) printf("Reading parameter file at time %f s...\n", myseconds() - wallclock); // Setting Up the problem //=========================================================================================================== // This module function reads the terminal input when calling LENSTOOL and checks that it is correct // Otherwise it exits LENSTOOL if (world_rank == 0) module_readCheckInput_readInput(argc, argv); // This module function reads the cosmology parameters from the parameter file // Input: struct cosmologicalparameters cosmology, parameter file // Output: Initialized cosmology struct cosmo_param cosmology; // Cosmology struct to store the cosmology data from the file std::string inputFile = argv[1]; // Input file module_readParameters_readCosmology(inputFile, cosmology); // This module function reads the runmode paragraph and the number of sources, arclets, etc. in the parameter file. // The runmode_param stores the information of what exactly the user wants to do with lenstool. struct runmode_param runmode; module_readParameters_readRunmode(inputFile, &runmode); if (world_rank == 0) { module_readParameters_debug_cosmology(runmode.debug, cosmology); module_readParameters_debug_runmode(runmode.debug, runmode); } //=== Declaring variables struct grid_param frame; struct galaxy images[runmode.nimagestot]; struct galaxy sources[runmode.nsets]; struct Potential lenses[runmode.nhalos+runmode.npotfile-1]; struct Potential_SOA lenses_SOA_table[NTYPES]; struct Potential_SOA lenses_SOA; struct cline_param cline; struct potfile_param potfile; struct Potential potfilepotentials[runmode.npotfile]; struct potentialoptimization host_potentialoptimization[runmode.nhalos]; int nImagesSet[runmode.nsets]; // Contains the number of images in each set of imagesnano // This module function reads in the potential form and its parameters (e.g. NFW) // Input: input file // Output: Potentials and its parameters module_readParameters_Potential(inputFile, lenses, runmode.nhalos); //Converts to SOA //module_readParameters_PotentialSOA(inputFile, lenses, lenses_SOA, runmode.Nlens); module_readParameters_PotentialSOA(inputFile, lenses, &lenses_SOA, runmode.nhalos); //module_readParameters_PotentialSOA_nonsorted(inputFile, lenses, &lenses_SOA_nonsorted, runmode.nhalos); //@@module_readParameters_debug_potential(runmode.debug, lenses, runmode.nhalos); //std::cerr << lenses_SOA[1].b0[0] << " " << lenses[0].b0 << std::endl; // This module function reads in the potfiles parameters // Input: input file // Output: Potentials from potfiles and its parameters if (runmode.potfile == 1 ){ module_readParameters_readpotfiles_param(inputFile, &potfile,cosmology); module_readParameters_debug_potfileparam(runmode.debug, &potfile); module_readParameters_readpotfiles(&runmode,&potfile,lenses); module_readParameters_debug_potential(runmode.debug, lenses, runmode.nhalos+runmode.npotfile); } // This module function reads in the grid form and its parameters // Input: input file // Output: grid and its parameters -#if 1 module_readParameters_Grid(inputFile, &frame); if (runmode.image == 1 or runmode.inverse == 1 or runmode.time > 0){ // This module function reads in the strong lensing images module_readParameters_readImages(&runmode, images, nImagesSet); //runmode.nsets = runmode.nimagestot; for(int i = 0; i < runmode.nimagestot; ++i){ images[i].dls = module_cosmodistances_objectObject(lenses[0].z, images[i].redshift, cosmology); images[i].dos = module_cosmodistances_observerObject(images[i].redshift, cosmology); images[i].dr = module_cosmodistances_lensSourceToObserverSource(lenses[0].z, images[i].redshift, cosmology); } //module_readParameters_debug_image(runmode.debug, images, nImagesSet,runmode.nsets); } if (runmode.inverse == 1){ // This module function reads in the potential optimisation limits module_readParameters_limit(inputFile,host_potentialoptimization,runmode.nhalos); module_readParameters_debug_limit(runmode.debug, host_potentialoptimization[0]); } if (runmode.source == 1) { //Initialisation to default values.(Setting sources to z = 1.5 default value) for(int i = 0; i < runmode.nsets; ++i) { sources[i].redshift = 1.5; } // This module function reads in the strong lensing sources module_readParameters_readSources(&runmode, sources); //Calculating cosmoratios for(int i = 0; i < runmode.nsets; ++i) { sources[i].dls = module_cosmodistances_objectObject(lenses[0].z, sources[i].redshift, cosmology); sources[i].dos = module_cosmodistances_observerObject(sources[i].redshift, cosmology); sources[i].dr = module_cosmodistances_lensSourceToObserverSource(lenses[0].z, sources[i].redshift, cosmology); } module_readParameters_debug_source(runmode.debug, sources, runmode.nsets); } #ifdef __WITH_MPI MPI_Barrier(MPI_COMM_WORLD); #endif // -<<<<<<< HEAD - std::cout << "--------------------------" << std::endl << std::endl; -#endif - -======= if (verbose) std::cout << "--------------------------" << std::endl << std::endl; // ->>>>>>> lenstool_mpi // Lenstool Bruteforce //=========================================================================================================== if (verbose) { #ifdef __WITH_LENSTOOL { printf("Calling lenstool at time %f s\n", myseconds() - wallclock); setup_lenstool(); // double chi2; double lhood0(0); int error(0); double time; if ( M.ichi2 != 0 ) { int error; //NPRINTF(stdout, "INFO: compute chires.dat\n"); readConstraints(); o_chires("chires.dat"); time = -myseconds(); error = o_chi_lhood0(&chi2, &lhood0, NULL); time += myseconds(); printf("INFO: chi2 %lf Lhood %lf\n", chi2, -0.5 * ( chi2 + lhood0 ) ); o_global_free(); } std::cout << "Lenstool 6.8.1 chi Benchmark: "; std::cout << " Chi : " << std::setprecision(15) << chi2 ; std::cout << " Time " << std::setprecision(15) << time << std::endl; o_global_free(); } #endif } // Lenstool-GPU Bruteforce //=========================================================================================================== #if 0 { printf("Calling lenstoolhpc orig at time %f s\n", myseconds() - wallclock); std::cout << "LenstoolHPC dist chi Benchmark:\n "; double chi2; double time; int error; time = -myseconds(); mychi_bruteforce_SOA_CPU_grid_gradient_orig(&chi2, &error, &runmode, &lenses_SOA, &frame, nImagesSet, images); time += myseconds(); std::cout << " Chi : " << std::setprecision(15) << chi2; std::cout << " Time " << std::setprecision(15) << time << std::endl; } #endif #if 0 { //std::cout << "MylenstoolHPC chi Benchmark:\n "; if (verbose) printf("Calling lenstoolhpc at time %f s\n", myseconds() - wallclock); double chi2; double time; int error; time = -myseconds(); mychi_bruteforce_SOA_CPU_grid_gradient(&chi2, &error, &runmode, &lenses_SOA, &frame, nImagesSet, images); time += myseconds(); if (verbose) { std::cout << " Chi : " << std::setprecision(15) << chi2; std::cout << " Time " << std::setprecision(15) << time << std::endl; } } #endif if (verbose) printf("Ending execution at time %f s\n", myseconds() - wallclock); #ifdef __WITH_MPI MPI_Finalize(); #endif }