diff --git a/src/chi_CPU_barycenter.cpp b/src/chi_CPU_barycenter.cpp index b28f8ae..77c039a 100644 --- a/src/chi_CPU_barycenter.cpp +++ b/src/chi_CPU_barycenter.cpp @@ -1,544 +1,287 @@ #include #include +//#include #include #include #include #include #include #ifndef __xlC__ #warning "gnu compilers" #include #endif //#include "simd_math.h" #include "chi_CPU.hpp" //#ifdef __WITH_GPU #include "grid_gradient_GPU.cuh" ////#endif //#include "gradient_GPU.cuh" #ifdef _OPENMP #include #endif #ifdef __WITH_MPI #warning "MPI enabled" #include #include "mpi_check.h" #endif -// + #include "delense_CPU_utils.hpp" #include "delense_CPU.hpp" -// +#include "delense_GPU.cuh" +#include "chi_comm.hpp" +#include "chi_computation.hpp" #ifdef __WITH_GPU #include -#include "delense_GPU.cuh" #include "cudafunctions.cuh" #endif #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) // // // double myseconds() { struct timeval tp; struct timezone tzp; int i; i = gettimeofday(&tp,&tzp); return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); } void mychi_bruteforce_SOA_CPU_grid_gradient_barycentersource(double *chi, int *error, runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, galaxy *images) { // //double dx, dy; //, x_pos, y_pos; //pixelsize // // double im_dist[MAXIMPERSOURCE]; // distance to a real image for an "theoretical" image found from a theoretical source // int im_index; // index of the closest real image for an image found from a theoretical source // int second_closest_id; // index of the second closest real image for an image found from a theoretical source // int thread_found_image = 0; // at each iteration in the lens plane, turn to 1 whether the thread find an image // struct point im_position, temp; // position of the image found from a theoretical source + temp variable for comparison // struct triplet Tsup, Tinf, Tsupsource, Tinfsource;// triangles for the computation of the images created by the theoretical sources + // unsigned int nsets = runmode->nsets; // struct galaxy sources[nsets]; // theoretical sources (common for a set) int nimagesfound [nsets][MAXIMPERSOURCE]; // number of images found from the theoretical sources int locimagesfound[nsets]; //int* locimagesfound = (int*) malloc(nsets*sizeof(int)); struct point image_pos [nsets][MAXIMPERSOURCE]; //struct point* image_pos = (struct point*) malloc(nsets*MAXIMPERSOURCE*sizeof(struct point)); //double image_dist [nsets][runmode->nimagestot][MAXIMPERSOURCE]; struct point tim [MAXIMPERSOURCE]; // theoretical images (computed from sources) // int world_size = 1; int world_rank = 0; #ifdef __WITH_MPI MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); MPI_Barrier(MPI_COMM_WORLD); #endif unsigned int verbose = (world_rank == 0); // int grid_size = runmode->nbgridcells; int loc_grid_size = runmode->nbgridcells/world_size; // double y_len = fabs(frame->ymax - frame->ymin); int y_len_loc = runmode->nbgridcells/world_size; int y_pos_loc = (int) world_rank*y_len_loc; int y_bound = y_len_loc; // if ((world_rank + 1) != world_size) y_bound++; // // const double dx = (frame->xmax - frame->xmin)/(runmode->nbgridcells - 1); const double dy = (frame->ymax - frame->ymin)/(runmode->nbgridcells - 1); // double *grid_gradient_x, *grid_gradient_y; // //grid_gradient_x = (double *)malloc((int) grid_size*loc_grid_size*sizeof(double)); //grid_gradient_y = (double *)malloc((int) grid_size*loc_grid_size*sizeof(double)); //grid_gradient_x = (double *)malloc((int) grid_size*y_bound*sizeof(double)); //grid_gradient_y = (double *)malloc((int) grid_size*y_bound*sizeof(double)); #ifdef __WITH_GPU cudaMallocManaged(&grid_gradient_x, grid_size*y_bound*sizeof(double)); cudasafe(cudaGetLastError(), "grid_gradient_x allocation"); cudaMallocManaged(&grid_gradient_y, grid_size*y_bound*sizeof(double)); cudasafe(cudaGetLastError(), "grid_gradient_y allocation"); #else grid_gradient_x = (double *)malloc((int) grid_size*y_bound*sizeof(double)); grid_gradient_y = (double *)malloc((int) grid_size*y_bound*sizeof(double)); #endif - - // - //uais - //if (verbose) printf("@@%d: nsets = %d nimagestot = %d maximgpersource = %d, grid_size = %d, loc_grid_size = %d, y_pos_loc = %d\n", world_rank, nsets, runmode->nimagestot, MAXIMPERSOURCE, grid_size, loc_grid_size, y_pos_loc); // const int grid_dim = runmode->nbgridcells; //Packaging the image to sourceplane conversion double time = -myseconds(); double grad_time = -myseconds(); //gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_dim); #ifdef __WITH_GPU //@@printf("Using GPUs...\n"); fflush(stdout); #ifdef __WITH_UM //@@printf("Unified memory...\n"); fflush(stdout); gradient_grid_GPU_UM(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, 0, y_pos_loc); #else gradient_grid_GPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, 0, y_pos_loc); #endif //gradient_grid_GPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_size); #else int zero = 0; gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, zero, y_pos_loc); #endif //gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_size, loc_grid_size); #ifdef __WITH_MPI MPI_Barrier(MPI_COMM_WORLD); #endif grad_time += myseconds(); // int index = 0; // index tracks the image within the total image array in the image plane *chi = 0; // double chi2_time = 0.; double delense_time = 0.; double image_time = 0.; // int images_found = 0; long int images_total = 0; // //printf("@@nsets = %d nx = %d ny = %d, xmin = %f, dx = %f, ymin = %f, dy = %f\n", nsets, runmode->nbgridcells, runmode->nbgridcells, frame->xmin, dx, frame->ymin, dy ); // int numsets = 0; // for( int source_id = 0; source_id < nsets; source_id ++) numsets += nimages_strongLensing[source_id]; - //printf("@@Total numsets = %d\n", numsets); // time = -myseconds(); // // nsets : number of images in the source plane // nimagestot: number of images in the image plane // + // + // Image Lensing + // image_time -= myseconds(); // //unsigned int nsets = nsets; + // for( int source_id = 0; source_id < nsets; source_id ++) { // number of images in the image plane for the specific image (1,3,5...) unsigned short int nimages = nimages_strongLensing[source_id]; //@@printf("@@ source_id = %d, nimages = %d\n", source_id, nimages_strongLensing[source_id]); //Init sources sources[source_id].center.x = sources[source_id].center.y = 0.; //____________________________ image (constrains) loop ________________________________ for(unsigned short int image_id = 0; image_id < nimages; image_id++) { - //printf("@@ nimages = %d\n", nimages_strongLensing[source_id]); - //________ computation of theoretical sources _________ - // output: sources[source_id].center - //printf("Image = %f %f\n", images[index + image_id].center.x, images[index + image_id].center.y); - //mychi_transformImageToSourcePlane_SOA(runmode->nhalos, - // &images[index + image_id].center, - // images[index + image_id].dr, - // lens, - // &sources[source_id][image_id].center); - // - // void mychi_transformImageToSourcePlane_SOA(const int Nlens, const struct point *image_point, double dlsds, const struct Potential_SOA *lens, struct point *source_point) // struct galaxy sources_temp; struct point Grad = module_potentialDerivatives_totalGradient_SOA(&images[index + image_id].center, lens, runmode->nhalos); - //printf(" image %d, %d (%d) = (%.15f, %.15f) -> (%.15f, %.15f)\n", source_id, image_id, nimages, images[index + image_id].center.x, images[index + image_id].center.y, sources[source_id].center.x, sources[source_id].center.y ); - //printf("Grad.x = %f, %f\n", Grad.x, grid_gradient_x[int(images[index + image_id].center.x/dx)]); // // find the position of the constrain in the source plane sources_temp.center.x = images[index + image_id].center.x - images[index + image_id].dr*Grad.x; sources_temp.center.y = images[index + image_id].center.y - images[index + image_id].dr*Grad.y; - //printf(" image %d, %d (%d) = (%.15f, %.15f) -> (%.15f, %.15f)\n", source_id, image_id, nimages, images[index + image_id].center.x, images[index + image_id].center.y, sources_temp.center.x, sources_temp.center.y ); //________ Adding up for barycenter comp _________ sources[source_id].center.x += sources_temp.center.x; sources[source_id].center.y += sources_temp.center.y; //time += myseconds(); sources[source_id].redshift = 0.; sources[source_id].shape.a = sources[source_id].shape.b = sources[source_id].shape.theta = (type_t) 0.; sources[source_id].mag = 0.; // } //________ Dividing by nimages for final barycenter result _________ sources[source_id].center.x /= nimages; sources[source_id].center.y /= nimages; sources[source_id].redshift = images[index].redshift; sources[source_id].dr = images[index].dr; sources[source_id].dls = images[index].dls; sources[source_id].dos = images[index].dos; //#if 1 index += nimages; } #ifdef __WITH_MPI MPI_Barrier(MPI_COMM_WORLD); #endif image_time += myseconds(); // - // main loop + // Delensing // delense_time -= myseconds(); index = 0; int numimg = 0; #ifndef __WITH_GPU delense_barycenter(&image_pos[0][0], &locimagesfound[0], &numimg, runmode, lens, frame, nimages_strongLensing, sources, grid_gradient_x, grid_gradient_y); #else //delense_barycenter_GPU(&image_pos[0][0], &locimagesfound[0], &numimg, runmode, lens, frame, nimages_strongLensing, sources, grid_gradient_x, grid_gradient_y); //struct point* ip[MAXIMPERSOURCE] = image_pos; delense_barycenter_GPU(&image_pos[0][0], &locimagesfound[0], &numimg, runmode, lens, frame, nimages_strongLensing, sources, grid_gradient_x, grid_gradient_y); printf("done. numimg = %d\n", numimg);fflush(stdout); #endif + // + // Communications + // #ifdef __WITH_MPI MPI_Barrier(MPI_COMM_WORLD); #endif delense_time += myseconds(); // double comm_time = -myseconds(); // int numimagesfound [nsets]; - memset(&numimagesfound, 0, nsets*sizeof(int)); struct point imagesposition [nsets][MAXIMPERSOURCE]; - memset(&imagesposition, 0, nsets*MAXIMPERSOURCE*sizeof(point)); // - int numimagesfound_tmp[nsets]; - struct point imagesposition_tmp[nsets][MAXIMPERSOURCE]; - // - memset(numimagesfound_tmp, 0, nsets*sizeof(int)); - memset(imagesposition_tmp, 0, nsets*sizeof(int)); + memset(&numimagesfound, 0, nsets*sizeof(int)); + memset(&imagesposition, 0, nsets*MAXIMPERSOURCE*sizeof(point)); // -#ifdef __WITH_MPI - int total = 0; + delense_comm(&numimagesfound[0], &imagesposition[0][0], &numimg, nimages_strongLensing, &locimagesfound[0], &image_pos[0][0], nsets, world_rank, world_size); // - if (!verbose) - { - MPI_CHECK(MPI_Send( &numimg, 1, MPI_INT , 0, 666 + world_rank, MPI_COMM_WORLD )); - if (numimg != 0) - { - MPI_CHECK(MPI_Send( &locimagesfound, nsets , MPI_INT , 0, 666 + world_rank, MPI_COMM_WORLD )); - MPI_CHECK(MPI_Send( &image_pos, nsets*MAXIMPERSOURCE*2, MPI_DOUBLE, 0, 667 + world_rank, MPI_COMM_WORLD )); - } - } - // - if (verbose) - { - int image_sum = 0; - // - for (int ipe = 0; ipe < world_size; ++ipe) - { - MPI_Status status; - // - if (ipe == 0) - { - memcpy(&numimagesfound_tmp, &locimagesfound, nsets*sizeof(int)); - memcpy(&imagesposition_tmp, &image_pos, nsets*MAXIMPERSOURCE*sizeof(point)); - } - else - { - MPI_CHECK(MPI_Recv(&numimg , 1 , MPI_INT , ipe, 666 + ipe, MPI_COMM_WORLD, &status)); - if (numimg != 0) - { - MPI_CHECK(MPI_Recv(&numimagesfound_tmp, nsets , MPI_INT , ipe, 666 + ipe, MPI_COMM_WORLD, &status)); - MPI_CHECK(MPI_Recv(&imagesposition_tmp, nsets*MAXIMPERSOURCE*2, MPI_DOUBLE, ipe, 667 + ipe, MPI_COMM_WORLD, &status)); - } - } - // - //MPI_Reduce(&imagesfound_tmp, &total, ipe, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - // - if (numimg != 0) - { - //for (int jj = 0; jj < runmode->nimagestot; ++jj) - //{ - int loc_numimg = 0; - int loc_numimg_s = 0; - for (int ii = 0; ii < nsets; ++ii) - { - //int img_len = numimagesfound[ii][jj]; - int img_len = numimagesfound_tmp[ii]; - image_sum += img_len; - if (img_len != 0) - { - int loc_length = numimagesfound[ii]; - memcpy(&imagesposition[ii][loc_length], &imagesposition_tmp[ii], img_len*sizeof(point)); - numimagesfound[ii] += img_len; - loc_numimg += img_len; - loc_numimg_s += nimages_strongLensing[ii]; - //printf("%d: %d , img_len = %d, source = %d, sum = %d %d\n", ipe, ii, numimagesfound_tmp[ii], nimages_strongLensing[ii], loc_numimg, loc_numimg_s); - - } - } - } - } - printf(" Total number of images found = %d\n", numimg); - } - MPI_Barrier(MPI_COMM_WORLD); comm_time += myseconds(); -#endif // // image extraction to compute // - //Updated up to here!!!!!!!!!!!!!! - // chi2_time = -myseconds(); - index = 0; - // if (verbose) - for( int source_id = 0; source_id < nsets; source_id ++) - { - unsigned short int nimages = nimages_strongLensing[source_id]; - // - //______________________Initialisation______________________ - // - for (unsigned short int i = 0; i < nimages; ++i) - nimagesfound[source_id][i] = 0; - // - //for( unsigned short int image_id = 0; image_id < nimages; image_id++) - //{ -//#endif //collapsing - // - double image_dist[MAXIMPERSOURCE]; - // - //printf(" Image %d, number of sources found %d\n", image_id, loc_images_found); - // - struct point image_position; - int image_index; - // - for (int ii = 0; ii < /*loc*/numimagesfound[source_id]; ++ii) - { - // - // - int image_index = 0; - // - image_position = imagesposition[source_id][ii]; - image_dist[0] = mychi_dist(image_position, images[index + 0].center); // get the distance to the real image - for(int i = 1; i < nimages_strongLensing[source_id]; i++) - { // get the distance to each real image and keep the index of the closest real image - - image_dist[i] = mychi_dist(image_position, images[index + i].center); - if (image_dist[i] < image_dist[image_index]) - { - image_index = i; - } - } - // - // we should exit loops here - // - // p1_time += myseconds(); - // - int skip_image = 0; - // Sometimes due to the numerical errors at the centerpoint, - // for SIE potentials an additional image will appear at the center of the Potential. - // This is due to the fact that it is not possible to simulate an infinity value - // at the center correctly, Check that sis correspond to Nlens[0] - /* - for (int iterator = 0; iterator < runmode->Nlens[0]; ++iterator) - { - if ( fabs(image_position.x - lens[0].position_x[iterator]) <= dx/2. and fabs(image_position.y - lens[0].position_y[iterator]) <= dx/2.) - { - skip_image = 1; - printf("WARNING: You are using SIE potentials. An image to close to one of the potential centers has been classified as numerical error and removed \n"); - } - }*/ - //printf("%d %d %d %d\n", x_id, y_id,thread_found_image, skip_image); - if (!skip_image) - { - //#pragma omp atomic - images_found++; - struct point temp; - //printf(" source %d, image %d, index %d, Images found: %d\n", source_id, image_id, image_index, nimagesfound[source_id][image_id][image_index]); - //checking whether a closest image has already been found - if (nimagesfound[source_id][image_index] == 0) - { // if no image found up to now - - //image position is allocated to theoretical image - //#pragma omp critical - tim[image_index] = image_position; - //#pragma omp atomic - nimagesfound[source_id][image_index]++; - } - else if (nimagesfound[source_id][image_index] > 0) - { // if we have already found an image - // If the new image we found is closer than the previous image - //printf(" tim2: %f %f\n", image_dist[image_index], mychi_dist(images[index + image_index].center, tim[image_id][image_index])); - if (image_dist[image_index] < mychi_dist(images[index + image_index].center, tim[image_index])) - { - temp = tim[image_index]; // we store the position of the old image in temp - //#pragma omp critical - tim[image_index] = image_position; // we link the observed image with the image we just found - //printf("tim2 %d %d = %f %f\n", image_id, image_index, image_position.x, image_position.y); - } - else - { - temp = image_position; // we store the position of the image we just found in temp - } - // initialising second_closest_id to the highest value - // Loop over all images in the set except the closest one - // and initialize to the furthest away image - int second_closest_id = 0; - for (int i = 1; i < nimages_strongLensing[source_id] && (i != image_index); i++) - { - if(image_dist[i] > image_dist[second_closest_id]) second_closest_id=i; - } - /////////////////////////////////////////////////////////////// - // Loop over all images in the set that are not yet allocated to a theoretical image - // and allocate the closest one - // we search for an observed image not already linked (nimagesfound=0) - for(int i = 0; i < nimages_strongLensing[source_id] && nimagesfound[source_id][i] == 0; i++) - { - if(image_dist[i] < image_dist[second_closest_id]) - { - second_closest_id = i; - // im_index value changes only if we found a not linked yet image - image_index = i; - //printf("tim3 %d %d = %f %f\n", image_id, image_index, temp.x, temp.y); - //#pragma omp critical - tim[image_index] = temp; // if we found an observed and not already linked image, we allocate the theoretical image temp - } - } - // increasing the total number of images found (If we find more than 1 theoretical image linked to 1 real image, - // these theoretical - //#pragma omp atomic - nimagesfound[source_id][image_index]++; - // images are included in this number) - } - } - //#pragma omp atomic - //loc_images_found++; - //thread_found_image = 0; // for next iteration - } - // - //} - //#pragma omp barrier - //____________________________ end of image loop - // - //____________________________ computing the local chi square - // - double chiimage; - // - int _nimages = nimages_strongLensing[source_id]; - // - for( int iter = 0; iter < _nimages; iter++) - { - int i = iter; - //int i=iter/nimages_strongLensing[source_id]; - //int j=iter%nimages_strongLensing[source_id]; - - //printf("nimagesfound %d %d = %d\n", i, j, nimagesfound[i][j]); - //if(i != j) - //{ - if(nimagesfound[source_id][i] > 0) - { - double pow1 = images[index + i].center.x - tim[i].x; - double pow2 = images[index + i].center.y - tim[i].y; - // - //chiimage = pow(images[index + j].center.x - tim[i][j].x, 2) + pow(images[index + j].center.y - tim[i][j].y, 2); // compute the chi2 - chiimage = pow1*pow1 + pow2*pow2; // compute the chi2 - //printf("%d %d = %.15f\n", i, j, chiimage); - *chi += chiimage; - } - else - if(nimagesfound[source_id][i] == 0) - { - // If we do not find a correpsonding image, we add a big value to the chi2 to disfavor the model - *chi += 1000.*nimages_strongLensing[source_id]; - printf(" source_id = %d, image number = %d has not found an image\n", source_id, i); - } - //} - } - // - //____________________________ end of computing the local chi square - // - //printf("%d: chi = %.15f\n", source_id, *chi); - /* - for (int i=0; i < nimages_strongLensing[source_id]; ++i){ - for (int j=0; j < nimages_strongLensing[source_id]; ++j){ - printf(" %d",nimagesfound[i][j]); - } - printf("\n"); - }*/ + chi_computation(chi, &images_found, &numimagesfound[0], &imagesposition[0][0], nimages_strongLensing, images, nsets); - //Incrementing Index: Images already treated by previous source_id - index += nimages_strongLensing[source_id]; - } -#ifdef __WITH_MPI +#ifdef __WITH_GPU MPI_Barrier(MPI_COMM_WORLD); #endif // chi2_time += myseconds(); time += myseconds(); // if (verbose) { // // int nthreads = 1; // //#pragma omp parallel // nthreads = omp_get_num_threads(); // printf(" overall time = %f s.\n", time); printf(" - grad time = %f s.\n", grad_time); printf(" - image time = %f s.\n", image_time); printf(" - delense time = %f s.\n", delense_time); printf(" - comm time = %f s.\n", comm_time); printf(" - chi2 time = %f s.\n", chi2_time); // printf(" images found: %d out of %ld\n", images_found, images_total); } // #ifdef __WITH_GPU cudaFree(grid_gradient_x); cudaFree(grid_gradient_y); #else free(grid_gradient_x); free(grid_gradient_y); #endif } diff --git a/src/chi_barycenter_GPU.cu b/src/chi_barycenter_GPU.cu index e0ceddc..16fb454 100644 --- a/src/chi_barycenter_GPU.cu +++ b/src/chi_barycenter_GPU.cu @@ -1,544 +1,287 @@ #include #include +//#include #include #include #include #include #include #ifndef __xlC__ #warning "gnu compilers" #include #endif //#include "simd_math.h" #include "chi_CPU.hpp" //#ifdef __WITH_GPU #include "grid_gradient_GPU.cuh" ////#endif //#include "gradient_GPU.cuh" #ifdef _OPENMP #include #endif #ifdef __WITH_MPI #warning "MPI enabled" #include #include "mpi_check.h" #endif -// + #include "delense_CPU_utils.hpp" #include "delense_CPU.hpp" -// +#include "delense_GPU.cuh" +#include "chi_comm.hpp" +#include "chi_computation.hpp" #ifdef __WITH_GPU #include -#include "delense_GPU.cuh" #include "cudafunctions.cuh" #endif #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) // // // double myseconds() { struct timeval tp; struct timezone tzp; int i; i = gettimeofday(&tp,&tzp); return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); } void mychi_bruteforce_SOA_GPU_grid_gradient_barycentersource(double *chi, int *error, runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, galaxy *images) { // //double dx, dy; //, x_pos, y_pos; //pixelsize // // double im_dist[MAXIMPERSOURCE]; // distance to a real image for an "theoretical" image found from a theoretical source // int im_index; // index of the closest real image for an image found from a theoretical source // int second_closest_id; // index of the second closest real image for an image found from a theoretical source // int thread_found_image = 0; // at each iteration in the lens plane, turn to 1 whether the thread find an image // struct point im_position, temp; // position of the image found from a theoretical source + temp variable for comparison // struct triplet Tsup, Tinf, Tsupsource, Tinfsource;// triangles for the computation of the images created by the theoretical sources + // unsigned int nsets = runmode->nsets; // struct galaxy sources[nsets]; // theoretical sources (common for a set) int nimagesfound [nsets][MAXIMPERSOURCE]; // number of images found from the theoretical sources int locimagesfound[nsets]; //int* locimagesfound = (int*) malloc(nsets*sizeof(int)); struct point image_pos [nsets][MAXIMPERSOURCE]; //struct point* image_pos = (struct point*) malloc(nsets*MAXIMPERSOURCE*sizeof(struct point)); //double image_dist [nsets][runmode->nimagestot][MAXIMPERSOURCE]; struct point tim [MAXIMPERSOURCE]; // theoretical images (computed from sources) // int world_size = 1; int world_rank = 0; #ifdef __WITH_MPI MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); MPI_Barrier(MPI_COMM_WORLD); #endif unsigned int verbose = (world_rank == 0); // int grid_size = runmode->nbgridcells; int loc_grid_size = runmode->nbgridcells/world_size; // double y_len = fabs(frame->ymax - frame->ymin); int y_len_loc = runmode->nbgridcells/world_size; int y_pos_loc = (int) world_rank*y_len_loc; int y_bound = y_len_loc; // if ((world_rank + 1) != world_size) y_bound++; // // const double dx = (frame->xmax - frame->xmin)/(runmode->nbgridcells - 1); const double dy = (frame->ymax - frame->ymin)/(runmode->nbgridcells - 1); // double *grid_gradient_x, *grid_gradient_y; // //grid_gradient_x = (double *)malloc((int) grid_size*loc_grid_size*sizeof(double)); //grid_gradient_y = (double *)malloc((int) grid_size*loc_grid_size*sizeof(double)); //grid_gradient_x = (double *)malloc((int) grid_size*y_bound*sizeof(double)); //grid_gradient_y = (double *)malloc((int) grid_size*y_bound*sizeof(double)); #ifdef __WITH_GPU cudaMallocManaged(&grid_gradient_x, grid_size*y_bound*sizeof(double)); cudasafe(cudaGetLastError(), "grid_gradient_x allocation"); cudaMallocManaged(&grid_gradient_y, grid_size*y_bound*sizeof(double)); cudasafe(cudaGetLastError(), "grid_gradient_y allocation"); #else grid_gradient_x = (double *)malloc((int) grid_size*y_bound*sizeof(double)); grid_gradient_y = (double *)malloc((int) grid_size*y_bound*sizeof(double)); #endif - - // - //uais - //if (verbose) printf("@@%d: nsets = %d nimagestot = %d maximgpersource = %d, grid_size = %d, loc_grid_size = %d, y_pos_loc = %d\n", world_rank, nsets, runmode->nimagestot, MAXIMPERSOURCE, grid_size, loc_grid_size, y_pos_loc); // const int grid_dim = runmode->nbgridcells; //Packaging the image to sourceplane conversion double time = -myseconds(); double grad_time = -myseconds(); //gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_dim); #ifdef __WITH_GPU //@@printf("Using GPUs...\n"); fflush(stdout); #ifdef __WITH_UM //@@printf("Unified memory...\n"); fflush(stdout); gradient_grid_GPU_UM(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, 0, y_pos_loc); #else gradient_grid_GPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, 0, y_pos_loc); #endif //gradient_grid_GPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_size); #else int zero = 0; gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, zero, y_pos_loc); #endif //gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_size, loc_grid_size); #ifdef __WITH_MPI MPI_Barrier(MPI_COMM_WORLD); #endif grad_time += myseconds(); // int index = 0; // index tracks the image within the total image array in the image plane *chi = 0; // double chi2_time = 0.; double delense_time = 0.; double image_time = 0.; // int images_found = 0; long int images_total = 0; // //printf("@@nsets = %d nx = %d ny = %d, xmin = %f, dx = %f, ymin = %f, dy = %f\n", nsets, runmode->nbgridcells, runmode->nbgridcells, frame->xmin, dx, frame->ymin, dy ); // int numsets = 0; // for( int source_id = 0; source_id < nsets; source_id ++) numsets += nimages_strongLensing[source_id]; - //printf("@@Total numsets = %d\n", numsets); // time = -myseconds(); // // nsets : number of images in the source plane // nimagestot: number of images in the image plane // + // + // Image Lensing + // image_time -= myseconds(); // //unsigned int nsets = nsets; + // for( int source_id = 0; source_id < nsets; source_id ++) { // number of images in the image plane for the specific image (1,3,5...) unsigned short int nimages = nimages_strongLensing[source_id]; //@@printf("@@ source_id = %d, nimages = %d\n", source_id, nimages_strongLensing[source_id]); //Init sources sources[source_id].center.x = sources[source_id].center.y = 0.; //____________________________ image (constrains) loop ________________________________ for(unsigned short int image_id = 0; image_id < nimages; image_id++) { - //printf("@@ nimages = %d\n", nimages_strongLensing[source_id]); - //________ computation of theoretical sources _________ - // output: sources[source_id].center - //printf("Image = %f %f\n", images[index + image_id].center.x, images[index + image_id].center.y); - //mychi_transformImageToSourcePlane_SOA(runmode->nhalos, - // &images[index + image_id].center, - // images[index + image_id].dr, - // lens, - // &sources[source_id][image_id].center); - // - // void mychi_transformImageToSourcePlane_SOA(const int Nlens, const struct point *image_point, double dlsds, const struct Potential_SOA *lens, struct point *source_point) // struct galaxy sources_temp; struct point Grad = module_potentialDerivatives_totalGradient_SOA(&images[index + image_id].center, lens, runmode->nhalos); - //printf(" image %d, %d (%d) = (%.15f, %.15f) -> (%.15f, %.15f)\n", source_id, image_id, nimages, images[index + image_id].center.x, images[index + image_id].center.y, sources[source_id].center.x, sources[source_id].center.y ); - //printf("Grad.x = %f, %f\n", Grad.x, grid_gradient_x[int(images[index + image_id].center.x/dx)]); // // find the position of the constrain in the source plane sources_temp.center.x = images[index + image_id].center.x - images[index + image_id].dr*Grad.x; sources_temp.center.y = images[index + image_id].center.y - images[index + image_id].dr*Grad.y; - //printf(" image %d, %d (%d) = (%.15f, %.15f) -> (%.15f, %.15f)\n", source_id, image_id, nimages, images[index + image_id].center.x, images[index + image_id].center.y, sources_temp.center.x, sources_temp.center.y ); //________ Adding up for barycenter comp _________ sources[source_id].center.x += sources_temp.center.x; sources[source_id].center.y += sources_temp.center.y; //time += myseconds(); sources[source_id].redshift = 0.; sources[source_id].shape.a = sources[source_id].shape.b = sources[source_id].shape.theta = (type_t) 0.; sources[source_id].mag = 0.; // } //________ Dividing by nimages for final barycenter result _________ sources[source_id].center.x /= nimages; sources[source_id].center.y /= nimages; sources[source_id].redshift = images[index].redshift; sources[source_id].dr = images[index].dr; sources[source_id].dls = images[index].dls; sources[source_id].dos = images[index].dos; //#if 1 index += nimages; } #ifdef __WITH_MPI MPI_Barrier(MPI_COMM_WORLD); #endif image_time += myseconds(); // - // main loop + // Delensing // delense_time -= myseconds(); index = 0; int numimg = 0; #ifndef __WITH_GPU delense_barycenter(&image_pos[0][0], &locimagesfound[0], &numimg, runmode, lens, frame, nimages_strongLensing, sources, grid_gradient_x, grid_gradient_y); #else //delense_barycenter_GPU(&image_pos[0][0], &locimagesfound[0], &numimg, runmode, lens, frame, nimages_strongLensing, sources, grid_gradient_x, grid_gradient_y); //struct point* ip[MAXIMPERSOURCE] = image_pos; delense_barycenter_GPU(&image_pos[0][0], &locimagesfound[0], &numimg, runmode, lens, frame, nimages_strongLensing, sources, grid_gradient_x, grid_gradient_y); printf("done. numimg = %d\n", numimg);fflush(stdout); #endif + // + // Communications + // #ifdef __WITH_MPI MPI_Barrier(MPI_COMM_WORLD); #endif delense_time += myseconds(); // double comm_time = -myseconds(); // int numimagesfound [nsets]; - memset(&numimagesfound, 0, nsets*sizeof(int)); struct point imagesposition [nsets][MAXIMPERSOURCE]; - memset(&imagesposition, 0, nsets*MAXIMPERSOURCE*sizeof(point)); // - int numimagesfound_tmp[nsets]; - struct point imagesposition_tmp[nsets][MAXIMPERSOURCE]; - // - memset(numimagesfound_tmp, 0, nsets*sizeof(int)); - memset(imagesposition_tmp, 0, nsets*sizeof(int)); + memset(&numimagesfound, 0, nsets*sizeof(int)); + memset(&imagesposition, 0, nsets*MAXIMPERSOURCE*sizeof(point)); // -#ifdef __WITH_MPI - int total = 0; + delense_comm(&numimagesfound[0], &imagesposition[0][0], &numimg, nimages_strongLensing, &locimagesfound[0], &image_pos[0][0], nsets, world_rank, world_size); // - if (!verbose) - { - MPI_CHECK(MPI_Send( &numimg, 1, MPI_INT , 0, 666 + world_rank, MPI_COMM_WORLD )); - if (numimg != 0) - { - MPI_CHECK(MPI_Send( &locimagesfound, nsets , MPI_INT , 0, 666 + world_rank, MPI_COMM_WORLD )); - MPI_CHECK(MPI_Send( &image_pos, nsets*MAXIMPERSOURCE*2, MPI_DOUBLE, 0, 667 + world_rank, MPI_COMM_WORLD )); - } - } - // - if (verbose) - { - int image_sum = 0; - // - for (int ipe = 0; ipe < world_size; ++ipe) - { - MPI_Status status; - // - if (ipe == 0) - { - memcpy(&numimagesfound_tmp, &locimagesfound, nsets*sizeof(int)); - memcpy(&imagesposition_tmp, &image_pos, nsets*MAXIMPERSOURCE*sizeof(point)); - } - else - { - MPI_CHECK(MPI_Recv(&numimg , 1 , MPI_INT , ipe, 666 + ipe, MPI_COMM_WORLD, &status)); - if (numimg != 0) - { - MPI_CHECK(MPI_Recv(&numimagesfound_tmp, nsets , MPI_INT , ipe, 666 + ipe, MPI_COMM_WORLD, &status)); - MPI_CHECK(MPI_Recv(&imagesposition_tmp, nsets*MAXIMPERSOURCE*2, MPI_DOUBLE, ipe, 667 + ipe, MPI_COMM_WORLD, &status)); - } - } - // - //MPI_Reduce(&imagesfound_tmp, &total, ipe, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - // - if (numimg != 0) - { - //for (int jj = 0; jj < runmode->nimagestot; ++jj) - //{ - int loc_numimg = 0; - int loc_numimg_s = 0; - for (int ii = 0; ii < nsets; ++ii) - { - //int img_len = numimagesfound[ii][jj]; - int img_len = numimagesfound_tmp[ii]; - image_sum += img_len; - if (img_len != 0) - { - int loc_length = numimagesfound[ii]; - memcpy(&imagesposition[ii][loc_length], &imagesposition_tmp[ii], img_len*sizeof(point)); - numimagesfound[ii] += img_len; - loc_numimg += img_len; - loc_numimg_s += nimages_strongLensing[ii]; - //printf("%d: %d , img_len = %d, source = %d, sum = %d %d\n", ipe, ii, numimagesfound_tmp[ii], nimages_strongLensing[ii], loc_numimg, loc_numimg_s); - - } - } - } - } - printf(" Total number of images found = %d\n", numimg); - } - MPI_Barrier(MPI_COMM_WORLD); comm_time += myseconds(); -#endif // // image extraction to compute // - //Updated up to here!!!!!!!!!!!!!! - // chi2_time = -myseconds(); - index = 0; - // if (verbose) - for( int source_id = 0; source_id < nsets; source_id ++) - { - unsigned short int nimages = nimages_strongLensing[source_id]; - // - //______________________Initialisation______________________ - // - for (unsigned short int i = 0; i < nimages; ++i) - nimagesfound[source_id][i] = 0; - // - //for( unsigned short int image_id = 0; image_id < nimages; image_id++) - //{ -//#endif //collapsing - // - double image_dist[MAXIMPERSOURCE]; - // - //printf(" Image %d, number of sources found %d\n", image_id, loc_images_found); - // - struct point image_position; - int image_index; - // - for (int ii = 0; ii < /*loc*/numimagesfound[source_id]; ++ii) - { - // - // - int image_index = 0; - // - image_position = imagesposition[source_id][ii]; - image_dist[0] = mychi_dist(image_position, images[index + 0].center); // get the distance to the real image - for(int i = 1; i < nimages_strongLensing[source_id]; i++) - { // get the distance to each real image and keep the index of the closest real image - - image_dist[i] = mychi_dist(image_position, images[index + i].center); - if (image_dist[i] < image_dist[image_index]) - { - image_index = i; - } - } - // - // we should exit loops here - // - // p1_time += myseconds(); - // - int skip_image = 0; - // Sometimes due to the numerical errors at the centerpoint, - // for SIE potentials an additional image will appear at the center of the Potential. - // This is due to the fact that it is not possible to simulate an infinity value - // at the center correctly, Check that sis correspond to Nlens[0] - /* - for (int iterator = 0; iterator < runmode->Nlens[0]; ++iterator) - { - if ( fabs(image_position.x - lens[0].position_x[iterator]) <= dx/2. and fabs(image_position.y - lens[0].position_y[iterator]) <= dx/2.) - { - skip_image = 1; - printf("WARNING: You are using SIE potentials. An image to close to one of the potential centers has been classified as numerical error and removed \n"); - } - }*/ - //printf("%d %d %d %d\n", x_id, y_id,thread_found_image, skip_image); - if (!skip_image) - { - //#pragma omp atomic - images_found++; - struct point temp; - //printf(" source %d, image %d, index %d, Images found: %d\n", source_id, image_id, image_index, nimagesfound[source_id][image_id][image_index]); - //checking whether a closest image has already been found - if (nimagesfound[source_id][image_index] == 0) - { // if no image found up to now - - //image position is allocated to theoretical image - //#pragma omp critical - tim[image_index] = image_position; - //#pragma omp atomic - nimagesfound[source_id][image_index]++; - } - else if (nimagesfound[source_id][image_index] > 0) - { // if we have already found an image - // If the new image we found is closer than the previous image - //printf(" tim2: %f %f\n", image_dist[image_index], mychi_dist(images[index + image_index].center, tim[image_id][image_index])); - if (image_dist[image_index] < mychi_dist(images[index + image_index].center, tim[image_index])) - { - temp = tim[image_index]; // we store the position of the old image in temp - //#pragma omp critical - tim[image_index] = image_position; // we link the observed image with the image we just found - //printf("tim2 %d %d = %f %f\n", image_id, image_index, image_position.x, image_position.y); - } - else - { - temp = image_position; // we store the position of the image we just found in temp - } - // initialising second_closest_id to the highest value - // Loop over all images in the set except the closest one - // and initialize to the furthest away image - int second_closest_id = 0; - for (int i = 1; i < nimages_strongLensing[source_id] && (i != image_index); i++) - { - if(image_dist[i] > image_dist[second_closest_id]) second_closest_id=i; - } - /////////////////////////////////////////////////////////////// - // Loop over all images in the set that are not yet allocated to a theoretical image - // and allocate the closest one - // we search for an observed image not already linked (nimagesfound=0) - for(int i = 0; i < nimages_strongLensing[source_id] && nimagesfound[source_id][i] == 0; i++) - { - if(image_dist[i] < image_dist[second_closest_id]) - { - second_closest_id = i; - // im_index value changes only if we found a not linked yet image - image_index = i; - //printf("tim3 %d %d = %f %f\n", image_id, image_index, temp.x, temp.y); - //#pragma omp critical - tim[image_index] = temp; // if we found an observed and not already linked image, we allocate the theoretical image temp - } - } - // increasing the total number of images found (If we find more than 1 theoretical image linked to 1 real image, - // these theoretical - //#pragma omp atomic - nimagesfound[source_id][image_index]++; - // images are included in this number) - } - } - //#pragma omp atomic - //loc_images_found++; - //thread_found_image = 0; // for next iteration - } - // - //} - //#pragma omp barrier - //____________________________ end of image loop - // - //____________________________ computing the local chi square - // - double chiimage; - // - int _nimages = nimages_strongLensing[source_id]; - // - for( int iter = 0; iter < _nimages; iter++) - { - int i = iter; - //int i=iter/nimages_strongLensing[source_id]; - //int j=iter%nimages_strongLensing[source_id]; - - //printf("nimagesfound %d %d = %d\n", i, j, nimagesfound[i][j]); - //if(i != j) - //{ - if(nimagesfound[source_id][i] > 0) - { - double pow1 = images[index + i].center.x - tim[i].x; - double pow2 = images[index + i].center.y - tim[i].y; - // - //chiimage = pow(images[index + j].center.x - tim[i][j].x, 2) + pow(images[index + j].center.y - tim[i][j].y, 2); // compute the chi2 - chiimage = pow1*pow1 + pow2*pow2; // compute the chi2 - //printf("%d %d = %.15f\n", i, j, chiimage); - *chi += chiimage; - } - else - if(nimagesfound[source_id][i] == 0) - { - // If we do not find a correpsonding image, we add a big value to the chi2 to disfavor the model - *chi += 1000.*nimages_strongLensing[source_id]; - printf(" source_id = %d, image number = %d has not found an image\n", source_id, i); - } - //} - } - // - //____________________________ end of computing the local chi square - // - //printf("%d: chi = %.15f\n", source_id, *chi); - /* - for (int i=0; i < nimages_strongLensing[source_id]; ++i){ - for (int j=0; j < nimages_strongLensing[source_id]; ++j){ - printf(" %d",nimagesfound[i][j]); - } - printf("\n"); - }*/ + chi_computation(chi, &images_found, &numimagesfound[0], &imagesposition[0][0], nimages_strongLensing, images, nsets); - //Incrementing Index: Images already treated by previous source_id - index += nimages_strongLensing[source_id]; - } -#ifdef __WITH_MPI +#ifdef __WITH_GPU MPI_Barrier(MPI_COMM_WORLD); #endif // chi2_time += myseconds(); time += myseconds(); // if (verbose) { // // int nthreads = 1; // //#pragma omp parallel // nthreads = omp_get_num_threads(); // printf(" overall time = %f s.\n", time); printf(" - grad time = %f s.\n", grad_time); printf(" - image time = %f s.\n", image_time); printf(" - delense time = %f s.\n", delense_time); printf(" - comm time = %f s.\n", comm_time); printf(" - chi2 time = %f s.\n", chi2_time); // printf(" images found: %d out of %ld\n", images_found, images_total); } // #ifdef __WITH_GPU cudaFree(grid_gradient_x); cudaFree(grid_gradient_y); #else free(grid_gradient_x); free(grid_gradient_y); #endif }