diff --git a/Benchmarks/ChiBenchmark/chi_CPU.cpp b/Benchmarks/ChiBenchmark/chi_CPU.cpp deleted file mode 100644 index 9cf0885..0000000 --- a/Benchmarks/ChiBenchmark/chi_CPU.cpp +++ /dev/null @@ -1,528 +0,0 @@ -#include -#include -//#include -#include -#include -#include -#include -#include -#include -// -#ifndef __xlC__ -#warning "gnu compilers" -#include -#endif -// -//#include "simd_math.h" -#include "chi_CPU.hpp" -#ifdef __WITH_GPU -#include "cudafunctions.cuh" -#include "grid_gradient_GPU.cuh" -#endif -////#endif -//#include "gradient_GPU.cuh" -#ifdef _OPENMP -#include -#endif -#ifdef __WITH_MPI -#warning "MPI enabled" -#include -#include "mpi_check.h" -#endif -#include "delense_CPU_utils.hpp" -#include "delense_CPU.hpp" -// -#define MIN(a,b) (((a)<(b))?(a):(b)) -#define MAX(a,b) (((a)>(b))?(a):(b)) -// -extern -double myseconds(); -#if 0 -{ - struct timeval tp; - struct timezone tzp; - int i; - - i = gettimeofday(&tp,&tzp); - return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); -} -#endif -// -void mychi_bruteforce_SOA_CPU_grid_gradient(double *chi, int *error, runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, galaxy *images) -{ - // - //double dx, dy; //, x_pos, y_pos; //pixelsize - // - // double im_dist[MAXIMPERSOURCE]; // distance to a real image for an "theoretical" image found from a theoretical source - // int im_index; // index of the closest real image for an image found from a theoretical source - // int second_closest_id; // index of the second closest real image for an image found from a theoretical source - // int thread_found_image = 0; // at each iteration in the lens plane, turn to 1 whether the thread find an image - // struct point im_position, temp; // position of the image found from a theoretical source + temp variable for comparison - // struct triplet Tsup, Tinf, Tsupsource, Tinfsource;// triangles for the computation of the images created by the theoretical sources - // - unsigned int nsets = runmode->nsets; - unsigned int nimagestot = runmode->nimagestot; - // - struct galaxy sources [nsets][nimagestot]; // theoretical sources (common for a set) - int nimagesfound [nsets][nimagestot][MAXIMPERSOURCE]; // number of images found from the theoretical sources - int locimagesfound [nsets][nimagestot]; - struct point image_pos [nsets][nimagestot][MAXIMPERSOURCE]; - //double image_dist [nsets][nimagestot][MAXIMPERSOURCE]; - struct point tim [nimagestot][MAXIMPERSOURCE]; // theoretical images (computed from sources) - // - int world_size = 1; - int world_rank = 0; -#ifdef __WITH_MPI - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - MPI_Barrier(MPI_COMM_WORLD); -#endif - unsigned int verbose = (world_rank == 0); - // - int grid_size = runmode->nbgridcells; - int loc_grid_size = runmode->nbgridcells/world_size; - // - double y_len = fabs(frame->ymax - frame->ymin); - int y_len_loc = runmode->nbgridcells/world_size; - int y_pos_loc = (int) world_rank*y_len_loc; - int y_bound = y_len_loc; - if ((world_rank + 1) != world_size) y_bound++; - // - // - const double dx = (frame->xmax - frame->xmin)/(runmode->nbgridcells - 1); - const double dy = (frame->ymax - frame->ymin)/(runmode->nbgridcells - 1); - // - double *grid_gradient_x, *grid_gradient_y; - // - //grid_gradient_x = (double *)malloc((int) grid_size*loc_grid_size*sizeof(double)); - //grid_gradient_y = (double *)malloc((int) grid_size*loc_grid_size*sizeof(double)); -#ifdef __WITH_GPU - cudasafe(cudaMallocHost(&grid_gradient_x, grid_size*y_bound*sizeof(double)), "mychi_bruteforce_SOA_CPU_grid_gradient: allocating grid_gradient_x"); - cudasafe(cudaMallocHost(&grid_gradient_y, grid_size*y_bound*sizeof(double)), "mychi_bruteforce_SOA_CPU_grid_gradient: allocating grid_gradient_x"); -#else - grid_gradient_x = (double *)malloc((int) grid_size*y_bound*sizeof(double)); - grid_gradient_y = (double *)malloc((int) grid_size*y_bound*sizeof(double)); -#endif - // - //uais - //if (verbose) printf("@@%d: nsets = %d nimagestot = %d maximgpersource = %d, grid_size = %d, loc_grid_size = %d, y_pos_loc = %d\n", world_rank, nsets, nimagestot, MAXIMPERSOURCE, grid_size, loc_grid_size, y_pos_loc); - // - const int grid_dim = runmode->nbgridcells; - //Packaging the image to sourceplane conversion - double time = -myseconds(); - double grad_time = -myseconds(); - //gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_dim); - int zero = 0; -#ifdef __WITH_GPU -#warning "Using GPUs..." -#ifdef __WITH_UM -#warning "Chi computation using UNIFIED MEMORY" - //gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, zero, y_pos_loc); - gradient_grid_GPU_UM(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, zero, y_pos_loc); -#else - gradient_grid_GPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, zero, y_pos_loc); -#endif - cudasafe(cudaGetLastError(), "after gradient_grid_GPU"); - //gradient_grid_GPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, grid_size); -#else - gradient_grid_CPU(grid_gradient_x, grid_gradient_y, frame, lens, runmode->nhalos, dx, dy, grid_size, y_bound, zero, y_pos_loc); -#endif - // -#ifdef __WITH_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif - grad_time += myseconds(); - // - int index = 0; // index tracks the image within the total image array in the image plane - *chi = 0; - // - double chi2_time = 0.; - double delense_time = 0.; - double image_time = 0.; - // - int images_found = 0; - // - long int images_total = 0; - // - printf("@@nsets = %d nx = %d ny = %d, xmin = %f, dx = %f, ymin = %f, dy = %f\n", nsets, runmode->nbgridcells, runmode->nbgridcells, frame->xmin, dx, frame->ymin, dy ); - // - int numsets = 0; - // - for( int source_id = 0; source_id < nsets; source_id ++) - numsets += nimages_strongLensing[source_id]; - printf("@@Total numsets = %d\n", numsets); - // - // - // nsets : number of images in the source plane - // nimagestot: number of images in the image plane - // - image_time -= myseconds(); - // - for( int source_id = 0; source_id < nsets; source_id ++) - { - // number of images in the image plane for the specific image (1,3,5...) - unsigned short int nimages = nimages_strongLensing[source_id]; - //@@printf("@@ source_id = %d, nimages = %d\n", source_id, nimages_strongLensing[source_id]); - //____________________________ image (constrains) loop ________________________________ - for(unsigned short int image_id = 0; image_id < nimages; image_id++) - { - //printf("@@ nimages = %d\n", nimages_strongLensing[source_id]); - //________ computation of theoretical sources _________ - // output: sources[source_id].center - //printf("Image = %f %f\n", images[index + image_id].center.x, images[index + image_id].center.y); - mychi_transformImageToSourcePlane_SOA(runmode->nhalos, - &images[index + image_id].center, - images[index + image_id].dr, - lens, - &sources[source_id][image_id].center); - // - // void mychi_transformImageToSourcePlane_SOA(const int Nlens, const struct point *image_point, double dlsds, const struct Potential_SOA *lens, struct point *source_point) - // - struct point Grad = module_potentialDerivatives_totalGradient_SOA(&images[index + image_id].center, lens, runmode->nhalos); - //printf(" image %d, %d (%d) = (%.15f, %.15f) -> (%.15f, %.15f)\n", source_id, image_id, nimages, images[index + image_id].center.x, images[index + image_id].center.y, sources[source_id][image_id].center.x, sources[source_id][image_id].center.y ); - //printf("Grad.x = %f, %f\n", Grad.x, grid_gradient_x[images[index + image_id].center.x/dx]); - // - // find the position of the constrain in the source plane - sources[source_id][image_id].center.x = images[index + image_id].center.x - images[index + image_id].dr*Grad.x; - sources[source_id][image_id].center.y = images[index + image_id].center.y - images[index + image_id].dr*Grad.y; - // - //time += myseconds(); - // - sources[source_id][image_id].redshift = images[index + image_id].redshift; - // - sources[source_id][image_id].dr = images[index + image_id].dr; - sources[source_id][image_id].dls = images[index + image_id].dls; - sources[source_id][image_id].dos = images[index + image_id].dos; -//#if 1 - } - index += nimages; - } -#ifdef __WITH_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif - image_time += myseconds(); - // - // main loop - // - //double y_len = fabs(frame->ymax - frame->ymin); - //int y_len_loc = runmode->nbgridcells/world_size; - //int y_pos_loc = (int) world_rank*y_len_loc; - //printf("%d out of %d: y_id = %d to %d\n", world_rank, world_size, y_pos_loc, y_pos_loc + y_len_loc - 1); - //fflush(stdout); - //MPI_Barrier(MPI_COMM_WORLD); - // - delense_time -= myseconds(); - index = 0; - int numimg = 0; - // - delense(/*nsets, nimagestot,*/ &image_pos[0][0][0], &locimagesfound[0][0], &numimg, runmode, lens, frame, nimages_strongLensing, &sources[0][0], grid_gradient_x, grid_gradient_y); - printf("%d: numimg = %d\n", world_rank, numimg); -// -// local delensing done, moving on to the reduction -// -#ifdef __WITH_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif - delense_time += myseconds(); - // - double comm_time = -myseconds(); - // - int numimagesfound [nsets][nimagestot]; - memset(&numimagesfound, 0, nsets*nimagestot*sizeof(int)); - struct point imagesposition [nsets][nimagestot][MAXIMPERSOURCE]; - memset(&imagesposition, 0, nsets*nimagestot*MAXIMPERSOURCE*sizeof(point)); - // - int numimagesfound_tmp[nsets][nimagestot]; - struct point imagesposition_tmp[nsets][nimagestot][MAXIMPERSOURCE]; - // - memset(numimagesfound_tmp, 0, nsets*nimagestot*sizeof(int)); - memset(imagesposition_tmp, 0, nsets*nimagestot*sizeof(int)); - // -#ifdef __WITH_MPI - int total = 0; - //MPI_Reduce(&locimagesfound, &imagesfound, nsets*nimagestot, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - //MPI_Reduce(&locimagesfound, &imagesfound, nsets*nimagestot, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - // - if (!verbose) - { - MPI_CHECK(MPI_Send( &numimg , 1 , MPI_INT , 0, 666 + world_rank, MPI_COMM_WORLD )); - if (numimg != 0) - { - MPI_CHECK(MPI_Send( &locimagesfound, nsets*nimagestot , MPI_INT , 0, 666 + world_rank, MPI_COMM_WORLD )); - //MPI_CHECK(MPI_Send( &image_pos, nsets*nimagestot*MAXIMPERSOURCE, MPI_points, 0, 667 + world_rank, MPI_COMM_WORLD )); - MPI_CHECK(MPI_Send( &image_pos, nsets*nimagestot*MAXIMPERSOURCE*2, MPI_DOUBLE, 0, 667 + world_rank, MPI_COMM_WORLD )); - } - } - // - if (verbose) - { - int image_sum = 0; - // - for (int ipe = 0; ipe < world_size; ++ipe) - { - MPI_Status status; - // - if (ipe == 0) - { - memcpy(&numimagesfound_tmp, &locimagesfound, nsets*nimagestot*sizeof(int)); - memcpy(&imagesposition_tmp, &image_pos, nsets*nimagestot*MAXIMPERSOURCE*sizeof(point)); - } - else - { - MPI_CHECK(MPI_Recv(&numimg , 1 , MPI_INT , ipe, 666 + ipe, MPI_COMM_WORLD, &status)); - if (numimg != 0) - { - MPI_CHECK(MPI_Recv(&numimagesfound_tmp, nsets*nimagestot , MPI_INT , ipe, 666 + ipe, MPI_COMM_WORLD, &status)); - MPI_CHECK(MPI_Recv(&imagesposition_tmp, nsets*nimagestot*MAXIMPERSOURCE*2, MPI_DOUBLE, ipe, 667 + ipe, MPI_COMM_WORLD, &status)); - } - } - // - //MPI_Reduce(&imagesfound_tmp, &total, ipe, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - // - if (numimg != 0) - for (int jj = 0; jj < nimagestot; ++jj) - { - for (int ii = 0; ii < nsets; ++ii) - { - //int img_len = numimagesfound[ii][jj]; - int img_len = numimagesfound_tmp[ii][jj]; - //printf("%d: %d %d, img_len = %d\n", ipe, ii, jj, img_len); - image_sum += img_len; - if (img_len != 0) - { - //int loc_length = numimagesfound[ii][jj]; - int loc_length = numimagesfound[ii][jj]; - memcpy(&imagesposition[ii][jj][loc_length], &imagesposition_tmp[ii][jj], img_len*sizeof(point)); - numimagesfound[ii][jj] += img_len; - numimg += img_len; - - } - } - } - } - } - //MPI_Barrier(MPI_COMM_WORLD); - comm_time += myseconds(); -#endif - // - // image extraction to compute - // - chi2_time = -myseconds(); - index = 0; - // - if (verbose) - for( int source_id = 0; source_id < nsets; source_id ++) - { - unsigned short int nimages = nimages_strongLensing[source_id]; - // - //______________________Initialisation______________________ - // - for (unsigned short int i = 0; i < nimages; ++i) - for (unsigned short int j = 0; j < nimages; ++j) - nimagesfound[source_id][i][j] = 0; - // - for( unsigned short int image_id = 0; image_id < nimages; image_id++) - { -//#endif - // - double image_dist[MAXIMPERSOURCE]; - // - //printf(" Image %d, number of sources found %d\n", image_id, loc_images_found); - // - struct point image_position; - int image_index; - // - for (int ii = 0; ii < /*loc*/numimagesfound[source_id][image_id]; ++ii) - { - // - // - int image_index = 0; - // - image_position = imagesposition[source_id][image_id][ii]; - image_dist[0] = mychi_dist(image_position, images[index + 0].center); // get the distance to the real image - for(int i = 1; i < nimages_strongLensing[source_id]; i++) - { // get the distance to each real image and keep the index of the closest real image - - image_dist[i] = mychi_dist(image_position, images[index + i].center); - //printf(" *** image %d = %f %f, distance = %f\n", index + i, images[index + i].center.x, images[index + i].center.y, image_dist[i]); - if (image_dist[i] < image_dist[image_index]) - { - image_index = i; - } - } - // - // we should exit loops here - // - // p1_time += myseconds(); - // - int skip_image = 0; - // Sometimes due to the numerical errors at the centerpoint, - // for SIE potentials an additional image will appear at the center of the Potential. - // This is due to the fact that it is not possible to simulate an infinity value - // at the center correctly, Check that sis correspond to Nlens[0] - /* - for (int iterator = 0; iterator < runmode->Nlens[0]; ++iterator) - { - if ( fabs(image_position.x - lens[0].position_x[iterator]) <= dx/2. and fabs(image_position.y - lens[0].position_y[iterator]) <= dx/2.) - { - skip_image = 1; - printf("WARNING: You are using SIE potentials. An image to close to one of the potential centers has been classified as numerical error and removed \n"); - } - } - */ - //printf("%d %d %d %d\n", x_id, y_id,thread_found_image, skip_image); - if (!skip_image) - { - //#pragma omp atomic - images_found++; - struct point temp; - //printf(" source %d, image %d, index %d, Images found: %d\n", source_id, image_id, image_index, nimagesfound[source_id][image_id][image_index]); - //checking whether a closest image has already been found - if (nimagesfound[source_id][image_id][image_index] == 0) - { // if no image found up to now - - //image position is allocated to theoretical image - //#pragma omp critical - tim[image_id][image_index] = image_position; - //#pragma omp atomic - nimagesfound[source_id][image_id][image_index]++; - } - else if (nimagesfound[source_id][image_id][image_index] > 0) - { // if we have already found an image - // If the new image we found is closer than the previous image - //printf(" tim2: %f %f\n", image_dist[image_index], mychi_dist(images[index + image_index].center, tim[image_id][image_index])); - if (image_dist[image_index] < mychi_dist(images[index + image_index].center, tim[image_id][image_index])) - { - temp = tim[image_id][image_index]; // we store the position of the old image in temp - //#pragma omp critical - tim[image_id][image_index] = image_position; // we link the observed image with the image we just found - //printf("tim2 %d %d = %f %f\n", image_id, image_index, image_position.x, image_position.y); - } - else - { - temp = image_position; // we store the position of the image we just found in temp - } - // initialising second_closest_id to the highest value - // Loop over all images in the set except the closest one - // and initialize to the furthest away image - int second_closest_id = 0; - for (int i = 1; i < nimages_strongLensing[source_id] && (i != image_index); i++) - { - if(image_dist[i] > image_dist[second_closest_id]) second_closest_id=i; - } - /////////////////////////////////////////////////////////////// - // Loop over all images in the set that are not yet allocated to a theoretical image - // and allocate the closest one - // we search for an observed image not already linked (nimagesfound=0) - for(int i = 0; i < nimages_strongLensing[source_id] && nimagesfound[source_id][image_id][i] == 0; i++) - { - if(image_dist[i] < image_dist[second_closest_id]) - { - second_closest_id = i; - // im_index value changes only if we found a not linked yet image - image_index = i; - //printf("tim3 %d %d = %f %f\n", image_id, image_index, temp.x, temp.y); - //#pragma omp critical - tim[image_id][image_index] = temp; // if we found an observed and not already linked image, we allocate the theoretical image temp - } - } - // increasing the total number of images found (If we find more than 1 theoretical image linked to 1 real image, - // these theoretical - //#pragma omp atomic - nimagesfound[source_id][image_id][image_index]++; - // images are included in this number) - } - } - //#pragma omp atomic - //loc_images_found++; - //thread_found_image = 0; // for next iteration - } - // - } - //#pragma omp barrier - //____________________________ end of image loop - // - //____________________________ computing the local chi square - // - double chiimage; - // - int _nimages = nimages_strongLensing[source_id]; - // - for( int iter = 0; iter < _nimages*_nimages; iter++) - { - int i=iter/nimages_strongLensing[source_id]; - int j=iter%nimages_strongLensing[source_id]; - - //printf("nimagesfound %d %d = %d\n", i, j, nimagesfound[i][j]); - if(i != j) - { - // In the current method, we get the source in the source plane by ray tracing image in nimagesfound[i][i]. If we ray trace back, - // we arrive again at the same position and thus the chi2 from it is 0. Thus we do not calculate the chi2 (-> if i!=j) - if(nimagesfound[source_id][i][j] > 0) - { - double pow1 = images[index + j].center.x - tim[i][j].x; - double pow2 = images[index + j].center.y - tim[i][j].y; - // - //chiimage = pow(images[index + j].center.x - tim[i][j].x, 2) + pow(images[index + j].center.y - tim[i][j].y, 2); // compute the chi2 - chiimage = pow1*pow1 + pow2*pow2; // compute the chi2 - //printf("%d %d = %.15f\n", i, j, chiimage); - *chi += chiimage; - } - else - if(nimagesfound[source_id][i][j] == 0) - { - // If we do not find a correpsonding image, we add a big value to the chi2 to disfavor the model - *chi += 100.*nimages_strongLensing[source_id]; - } - } - } - // - //____________________________ end of computing the local chi square - // - //printf("%d: chi = %.15f\n", source_id, *chi); - /* - for (int i=0; i < nimages_strongLensing[source_id]; ++i){ - for (int j=0; j < nimages_strongLensing[source_id]; ++j){ - printf(" %d",nimagesfound[i][j]); - } - printf("\n"); - }*/ - - //Incrementing Index: Images already treated by previous source_id - index += nimages_strongLensing[source_id]; - } - MPI_Barrier(MPI_COMM_WORLD); - // - chi2_time += myseconds(); - time += myseconds(); - // - if (verbose) - { - // - // int nthreads = 1; - // - //#pragma omp parallel - // nthreads = omp_get_num_threads(); - // - printf(" overall time = %f s.\n", time); - printf(" - grad time = %f s.\n", grad_time); - printf(" - image time = %f s.\n", image_time); - printf(" - delense time = %f s.\n", delense_time); - printf(" - comm time = %f s.\n", comm_time); - printf(" - chi2 time = %f s.\n", chi2_time); - // - printf(" images found: %d out of %ld\n", images_found, images_total); - } - // -#ifdef __WITH_GPU -#ifndef __WITH_UM - cudasafe(cudaFree(grid_gradient_x), "deallocating grid_gradient_x"); - cudasafe(cudaFree(grid_gradient_y), "deallocating grid_gradient_y"); -#endif -#else - free(grid_gradient_x); - free(grid_gradient_y); -#endif -} - - diff --git a/Benchmarks/ChiBenchmark/chi_CPU.hpp b/Benchmarks/ChiBenchmark/chi_CPU.hpp deleted file mode 100644 index 3e7d6de..0000000 --- a/Benchmarks/ChiBenchmark/chi_CPU.hpp +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once -#ifndef __CHI_CPU_HPP__ -#define __CHI_CPU_HPP__ - -#include -#include -#include -#include -#ifdef __AVX512F__ -#include "gradient_avx512f.hpp" -#endif - -void mychi_bruteforce_SOA_CPU_grid_gradient(double *chi, int *error, runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, galaxy *images); -void mychi_bruteforce_SOA_CPU_grid_gradient_orig(double *chi, int *error, runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, galaxy *images); -void mychi_bruteforce_SOA_CPU_grid_gradient_barycentersource(double *chi, int *error, runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, galaxy *images); - -void mychi_transformImageToSourcePlane(const runmode_param *runmode, const struct point *image_point, double dlsds, const struct Potential *lens, struct point *source_point); -void mychi_transformImageToSourcePlane_SOA(const int Nlens, const struct point *image_point, double dlsds, const struct Potential_SOA *lens, struct point *source_point); -void mychi_transformImageToSourcePlane_SOA_AVX(const int *Nlens, const struct point *image_point, double dlsds, const struct Potential_SOA *lens, struct point *source_point); -void mychi_transformImageToSourcePlane_SOA_Packed(const int *Nlens, const struct point *image_point, double dlsds, struct point *source_point, double *grad_x, double * grad_y, int grad_id); - -void mychi_transformtriangleImageToSourcePlane(const runmode_param *runmode, struct triplet *I, double dlsds, const struct Potential *lens, struct triplet *S); -void mychi_transformtriangleImageToSourcePlane_SOA( struct triplet *I, double dlsds, const struct Potential_SOA *lens, struct triplet *S); -void mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_upper( struct triplet *I, double dlsds, struct triplet *S, double *grad_x, double * grad_y, int grad_id, int nbgridcell); -void mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_lower( struct triplet *I, double dlsds, struct triplet *S, double *grad_x, double * grad_y, int grad_id, int nbgridcell); - -double mychi_determinant(const struct point *A,const struct point *B,const struct point *C); -int mychi_inside(const struct point *P, struct triplet *T); -int mychi_insideborder(const struct point *P, struct triplet *T); -struct point mychi_barycenter(struct triplet *A); -double mychi_dist(struct point A, struct point B); -#endif diff --git a/Benchmarks/ChiBenchmark/delense_CPU.cpp b/Benchmarks/ChiBenchmark/delense_CPU.cpp deleted file mode 100644 index fce63b7..0000000 --- a/Benchmarks/ChiBenchmark/delense_CPU.cpp +++ /dev/null @@ -1,266 +0,0 @@ -// This file is part of lenstoolHPC -// authors: gilles.fourestey@epfl.ch - -#include "delense_CPU.hpp" -#if 1 -void delense_barycenter(struct point* image_pos, int* locimagesfound, int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy* sources, double* grid_gradient_x, double* grid_gradient_y) -{ -#define INDEX2D_BAR(y, x) (MAXIMPERSOURCE*y + x) - //const unsigned int nimagestot = runmode->nimagestot; - const unsigned int nsets = runmode->nsets; - const unsigned int nbgridcells = runmode->nbgridcells; - int world_size = 1; - int world_rank = 0; -#ifdef __WITH_MPI - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); -#endif - unsigned int verbose = (world_rank == 0); - // - const int grid_dim = runmode->nbgridcells; - const int grid_size = nbgridcells; - const int loc_grid_size = nbgridcells/world_size; - // - double y_len = fabs(frame->ymax - frame->ymin); - int y_len_loc = nbgridcells/world_size; - int y_pos_loc = (int) world_rank*y_len_loc; - int y_bound = y_len_loc; - // - if ((world_rank + 1) != world_size) y_bound++; - // - const double dx = (frame->xmax - frame->xmin)/(nbgridcells - 1); - const double dy = (frame->ymax - frame->ymin)/(nbgridcells - 1); - // - int images_total = 0; - int index = 0; - // - int lif[nsets][16]; - memset(&lif,0, nsets*16*sizeof(int)); - // - //locimagesfound[source_id] = 0; - //lif[source_id] = 0; - // number of images in the image plane for the specific image (1,3,5...) - //unsigned short int nimages = nimages_strongLensing[source_id]; - //printf("@@ source_id = %d, nimages = %d\n", source_id, nimages_strongLensing[source_id]); - //____________________________ image (constrains) loop ________________________________ - // - //struct point image_pos [MAXIMPERSOURCE]; - // - //MPI_Barrier(MPI_COMM_WORLD); - //if (verbose) printf("source = %d, image = %d\n", source_id, image_id); - //if (verbose) fflush(stdout); - for( int source_id = 0; source_id < nsets; source_id ++) - { - int loc_images_found = 0; - - // MPI_Barrier(MPI_COMM_WORLD); - //#endif -#pragma omp parallel -#pragma omp for reduction(+: images_total) - for (int y_id = 0; y_id < (y_bound - 1); ++y_id) - { - for (int x_id = 0; x_id < runmode->nbgridcells - 1 ; ++x_id) - { - images_total++; - // - double x_pos = frame->xmin + ( x_id)*dx; - double y_pos = frame->ymin + (y_pos_loc + y_id)*dy; - // - // Define the upper + lower triangle, both together = square = pixel - // - struct triplet Tsup, Tinf; - // - Tsup.a.x = x_pos; - Tsup.b.x = x_pos; - Tsup.c.x = x_pos + dx; - Tinf.a.x = x_pos + dx; - Tinf.b.x = x_pos + dx; - Tinf.c.x = x_pos; - // - Tsup.a.y = y_pos; - Tsup.b.y = y_pos + dy; - Tsup.c.y = y_pos; - Tinf.a.y = y_pos + dy; - Tinf.b.y = y_pos; - Tinf.c.y = y_pos + dy; - // - // Lens to Sourceplane conversion of triangles - // - // double time = -myseconds(); - // - struct triplet Timage; - struct triplet Tsource; - // - mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_upper(&Tsup, sources[source_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, y_id*grid_dim + x_id, grid_dim); - // - int thread_found_image = 0; - // - if (mychi_insideborder(&sources[source_id].center, &Tsource) == 1) - { - thread_found_image = 1; - Timage = Tsup; - } - else - { - mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_lower(&Tinf, sources[source_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, y_id*grid_dim + x_id, grid_dim); - if (mychi_inside(&sources[source_id].center, &Tsource) == 1) - { - thread_found_image = 1; - Timage = Tinf; - } - } -#if 1 - if (thread_found_image) - { -#pragma omp critical - { - // get the barycenter of the triangle - image_pos[INDEX2D_BAR(source_id, loc_images_found)] = mychi_barycenter(&Timage); - //locimagesfound[source_id]++; - loc_images_found++; - *numimg = *numimg + 1; - //locimagesfound[source_id]++; - lif[source_id][0]++; - //loc_images_found++; - //*numimg = *numimg + 1; - } - } -#endif - } - } - index += nimages_strongLensing[source_id]; - - - } -#pragma omp parallel for - for (int ii = 0; ii < nsets; ++ii) - locimagesfound[ii] = lif[ii][0]; -} -#endif - -void delense(struct point* image_pos, int* locimagesfound, int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy* sources, double* grid_gradient_x, double* grid_gradient_y) -{ - const unsigned int nsets = runmode->nsets; - const unsigned int nimagestot = runmode->nimagestot; -#define INDEX2D(y, x) (nimagestot*y + x) -#define INDEX3D(y, x, z) (nimagestot*MAXIMPERSOURCE*y + MAXIMPERSOURCE*x + z) - - //const unsigned int nsets = runmode->nsets; - //const unsigned int nimagestot = runmode->nimagestot; - const unsigned int nbgridcells = runmode->nbgridcells; - //printf("nsets = %d, nimagestot = %d\n", nsets, nimagestot); - int world_size = 1; - int world_rank = 0; -#ifdef __WITH_MPI - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - MPI_Barrier(MPI_COMM_WORLD); -#endif - unsigned int verbose = (world_rank == 0); - // - const int grid_dim = runmode->nbgridcells; - const int grid_size = nbgridcells; - const int loc_grid_size = nbgridcells/world_size; - // - double y_len = fabs(frame->ymax - frame->ymin); - int y_len_loc = nbgridcells/world_size; - int y_pos_loc = (int) world_rank*y_len_loc; - int y_bound = y_len_loc; - // - if ((world_rank + 1) != world_size) y_bound++; - // - const double dx = (frame->xmax - frame->xmin)/(nbgridcells - 1); - const double dy = (frame->ymax - frame->ymin)/(nbgridcells - 1); - // - int images_total = 0; - int index = 0; - // - for( int source_id = 0; source_id < nsets; source_id ++) - { - // number of images in the image plane for the specific image (1,3,5...) - const unsigned short int nimages = nimages_strongLensing[source_id]; - //____________________________ image (constrains) loop ________________________________ - for(unsigned short int image_id = 0; image_id < nimages; image_id++) - { - // - //struct point image_pos [MAXIMPERSOURCE]; - // - int loc_images_found = 0; - // -#pragma omp parallel -#pragma omp for reduction(+: images_total) - for (int y_id = 0; y_id < (y_bound - 1); ++y_id) - { - for (int x_id = 0; x_id < runmode->nbgridcells - 1 ; ++x_id) - { - //int yy_pos = MIN(y_pos_loc + y_id, runmode->nbgridcells - 1); - images_total++; - // - double x_pos = frame->xmin + ( x_id)*dx; - double y_pos = frame->ymin + (y_pos_loc + y_id)*dy; - // - // Define the upper + lower triangle, both together = square = pixel - // - struct triplet Tsup, Tinf; - // - Tsup.a.x = x_pos; - Tsup.b.x = x_pos; - Tsup.c.x = x_pos + dx; - Tinf.a.x = x_pos + dx; - Tinf.b.x = x_pos + dx; - Tinf.c.x = x_pos; - // - Tsup.a.y = y_pos; - Tsup.b.y = y_pos + dy; - Tsup.c.y = y_pos; - Tinf.a.y = y_pos + dy; - // - Tsup.c.y = y_pos; - Tinf.a.y = y_pos + dy; - Tinf.b.y = y_pos; - Tinf.c.y = y_pos + dy; - // - // Lens to Sourceplane conversion of triangles - // - struct triplet Timage; - struct triplet Tsource; - // - double dr = sources[INDEX2D(source_id, image_id)].dr; - mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_upper(&Tsup, dr, &Tsource, grid_gradient_x, grid_gradient_y, y_id*grid_dim + x_id, grid_dim); - // - int thread_found_image = 0; - // - if (mychi_insideborder(&sources[INDEX2D(source_id, image_id)].center, &Tsource) == 1) - { - thread_found_image = 1; - Timage = Tsup; - } - else - { - mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_lower(&Tinf, sources[INDEX2D(source_id, image_id)].dr, &Tsource, grid_gradient_x, grid_gradient_y, y_id*grid_dim + x_id, grid_dim); - if (mychi_inside(&sources[INDEX2D(source_id, image_id)].center, &Tsource) == 1) - { - thread_found_image = 1; - Timage = Tinf; - } - } -#if 1 - if (thread_found_image) - { -#pragma omp critical - { - // get the barycenter of the triangle - image_pos[INDEX3D(source_id, image_id, loc_images_found)] = mychi_barycenter(&Timage); - locimagesfound[INDEX2D(source_id, image_id)]++; - loc_images_found++; - *numimg = *numimg + 1; - } - } -#endif - } - } -//#if 1 - } - index += nimages_strongLensing[source_id]; - } -} diff --git a/Benchmarks/ChiBenchmark/delense_CPU.hpp b/Benchmarks/ChiBenchmark/delense_CPU.hpp deleted file mode 100644 index 758950f..0000000 --- a/Benchmarks/ChiBenchmark/delense_CPU.hpp +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#ifdef __WITH_MPI -#include -#endif - -#include -#include -#include -#ifdef __AVX512F__ -#include "gradient_avx512f.hpp" -#else -#include -#endif -#include "delense_CPU_utils.hpp" - - -// -//void delense_barycenter(struct point*** image_pos, int* locimagesfound, int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy* sources, double* grid_gradient_x, double* grid_gradient_y); -void delense_barycenter(struct point* image_pos, int* locimagesfound, int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy* sources, double* grid_gradient_x, double* grid_gradient_y); -// -//void delense(int nsets, int nimagestot, struct point* image_pos[nsets][nimagestot][MAXIMPERSOURCE], int* locimagesfound[nsets][nimagestot], int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy sources[nsets][nimagestot], double* grid_gradient_x, double* grid_gradient_y); -//void delense(int nsets, int nimagestot, struct point image_pos[nsets][nimagestot][MAXIMPERSOURCE], int locimagesfound[nsets][nimagestot], int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy sources[nsets][nimagestot], double* grid_gradient_x, double* grid_gradient_y); -void delense(struct point* image_pos, int* locimagesfound, int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy* sources, double* grid_gradient_x, double* grid_gradient_y); diff --git a/Benchmarks/ChiBenchmark/delense_GPU.cu b/Benchmarks/ChiBenchmark/delense_GPU.cu deleted file mode 100644 index 2c631ef..0000000 --- a/Benchmarks/ChiBenchmark/delense_GPU.cu +++ /dev/null @@ -1,351 +0,0 @@ -// This file is part of lenstoolHPC -// authors: gilles.fourestey@epfl.ch - -#include "delense_CPU_utils.hpp" -#include "delense_GPU_utils.cuh" -#include "structure_hpc.hpp" -#include "cudafunctions.cuh" -#ifdef __WITH_MPI -#include -#endif - -extern double myseconds(); - -#define BLOCK_SIZE_X 16 -#define BLOCK_SIZE_Y 16 - - -#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) - - -inline -void -moveToGPU(void** d_mem, void* h_mem, int size) -{ -#ifdef __WITH_UM - cudasafe(cudaMallocManaged((void**) d_mem, size), "memory allocation (UM)"); - cudasafe(cudaMemcpy(*d_mem, h_mem, size, cudaMemcpyHostToDevice), "H2D memory copy (UM)"); -#else - cudasafe(cudaMalloc((void**) d_mem, size), "memory allocation"); - cudasafe(cudaMemcpy(*d_mem, h_mem, size, cudaMemcpyHostToDevice), "H2D memory copy"); -#endif -} - -inline -void -moveFromGPU(void* h_mem, void* d_mem, int size) -{ -#ifdef __WITH_UM - //cudasafe(cudaMemcpy(h_mem, d_mem, size, cudaMemcpyDeviceToHost), "D2H memory copy (UM)"); - memcpy(h_mem, d_mem, size); -#else - cudasafe(cudaMemcpy(h_mem, d_mem, size, cudaMemcpyHostToDevice), "D2H memory copy"); -#endif -} -// -// -// -__global__ -void -delense_GPU(struct point* image_pos, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const struct galaxy* sources, const int y_pos_loc, const int y_bound, const int source_id, double* grid_gradient_x, double* grid_gradient_y) -{ -#define INDEX2D_BAR(y, x) (MAXIMPERSOURCE*y + x) - // - int nbgridcells_x = runmode->nbgridcells; - int nbgridcells_y = runmode->nbgridcells; - // - int col = blockIdx.x*blockDim.x + threadIdx.x; - if (col > nbgridcells_x - 2) return; - // - int num_blocks_y = blockDim.y; - int row_y = threadIdx.y; - int block_size_y = y_bound/num_blocks_y; - // - const double dx = (frame->xmax - frame->xmin)/(runmode->nbgridcells - 1); - const double dy = (frame->ymax - frame->ymin)/(runmode->nbgridcells - 1); - // - int img_pos = 0; - // - int row_s = (row_y + 0)*block_size_y; - int row_e = MIN((row_y + 1)*block_size_y, nbgridcells_y); - // - // - //for (int row = y_pos_loc; row < y_pos_loc + y_bound - 1; ++row) - int yp = y_pos_loc + row_s; - // - //printf("block id = %d, from %d to %d\n", row_y, yp, yp + block_size_y); - // - for (int row = 0; row < block_size_y - 0; ++row) - { - // - int y_id = row; - int x_id = col; - // - // int images_total; - // - double x_pos = frame->xmin + ( x_id + 0 )*dx; - double y_pos = frame->ymin + ( y_id + yp )*dy; - // - // Define the upper + lower triangle, both together = square = pixel - // - struct triplet Tsup, Tinf; - // - Tsup.a.x = x_pos; - Tsup.b.x = x_pos; - Tsup.c.x = x_pos + dx; - Tinf.a.x = x_pos + dx; - Tinf.b.x = x_pos + dx; - Tinf.c.x = x_pos; - // - Tsup.a.y = y_pos; - Tsup.b.y = y_pos + dy; - Tsup.c.y = y_pos; - Tinf.a.y = y_pos + dy; - Tinf.b.y = y_pos; - Tinf.c.y = y_pos + dy; - // - // Lens to Sourceplane conversion of triangles - // - // double time = -myseconds(); - // - struct triplet Timage; - struct triplet Tsource; - // - int g_loc = (y_id + row_s)*nbgridcells_x + x_id; - // - mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_upper_GPU(&Tsup, sources[source_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, g_loc, nbgridcells_x); - // - //int thread_found_image = 0; - // - if (mychi_insideborder_GPU(&sources[source_id].center, &Tsource) == 1) - { -// printf("col = %d, %d, row = %d, %d, img_pos = %d\n", col, col*num_blocks_y*MAXIMPERSOURCE, row_y, row_y*MAXIMPERSOURCE, img_pos); - //printf("found source = %d, row = %d, index 1 = %d, index 2 = %d, img_pos = %d\n", source_id, row + yp, col*num_blocks_y*MAXIMPERSOURCE, row_y*MAXIMPERSOURCE, img_pos); - image_pos[col*num_blocks_y*MAXIMPERSOURCE + row_y*MAXIMPERSOURCE + img_pos + 0] = mychi_barycenter_GPU(&Tsup); - img_pos++; - } - else - { - mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_lower_GPU(&Tinf, sources[source_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, g_loc, nbgridcells_x); - if (mychi_inside_GPU(&sources[source_id].center, &Tsource) == 1) - { -// printf("col = %d, %d, row = %d, %d, img_pos = %d\n", col, col*num_blocks_y*MAXIMPERSOURCE, row_y, row_y*MAXIMPERSOURCE, img_pos); - //printf("found source = %d, row = %d, index 1 = %d, index 2 = %d, img_pos = %d\n", source_id, row, col*num_blocks_y*MAXIMPERSOURCE, row_y*MAXIMPERSOURCE, img_pos); - image_pos[col*num_blocks_y*MAXIMPERSOURCE + row_y*MAXIMPERSOURCE + img_pos + 0] = mychi_barycenter_GPU(&Tinf); - img_pos++; - } - } - } -} -// -// -// -__global__ -void -delense_GPU_old(struct point* image_pos, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const struct galaxy* sources, const int y_pos_loc, const int y_bound, const int source_id, double* grid_gradient_x, double* grid_gradient_y) -{ -#define INDEX2D_BAR(y, x) (MAXIMPERSOURCE*y + x) - // - int nbgridcells_x = runmode->nbgridcells; - int nbgridcells_y = runmode->nbgridcells; - int row = blockIdx.x*blockDim.x + threadIdx.x; - if (row > y_bound) return; - int num_blocks_y = gridDim.y; - int block_size_y = nbgridcells_y/num_blocks_y; - int x_pos_loc = threadIdx.y*block_size_y; - // - //int col = blockIdx.y*blockDim.y + threadIdx.y; - //if (col > runmode->nbgridcells) return; - // - const double dx = (frame->xmax - frame->xmin)/(runmode->nbgridcells - 1); - const double dy = (frame->ymax - frame->ymin)/(runmode->nbgridcells - 1); - // - int img_pos = 0; - // - for (int col = 0; col < runmode->nbgridcells; ++col) - { - // - int y_id = row; - int x_id = col; - // - //int images_total; - // - double x_pos = frame->xmin + (x_pos_loc + x_id)*dx; - double y_pos = frame->ymin + (y_pos_loc + y_id)*dy; - // - // Define the upper + lower triangle, both together = square = pixel - // - struct triplet Tsup, Tinf; - // - Tsup.a.x = x_pos; - Tsup.b.x = x_pos; - Tsup.c.x = x_pos + dx; - Tinf.a.x = x_pos + dx; - Tinf.b.x = x_pos + dx; - Tinf.c.x = x_pos; - // - Tsup.a.y = y_pos; - Tsup.b.y = y_pos + dy; - Tsup.c.y = y_pos; - Tinf.a.y = y_pos + dy; - Tinf.b.y = y_pos; - Tinf.c.y = y_pos + dy; - // - // Lens to Sourceplane conversion of triangles - // - // double time = -myseconds(); - // - struct triplet Timage; - struct triplet Tsource; - // - mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_upper_GPU(&Tsup, sources[source_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, y_id*runmode->nbgridcells + x_id, runmode->nbgridcells); - // - int thread_found_image = 0; - // - if (mychi_insideborder_GPU(&sources[source_id].center, &Tsource) == 1) - { - image_pos[row*MAXIMPERSOURCE + img_pos + 0] = mychi_barycenter_GPU(&Tsup); - //printf("%d %d: T = %f %f\n", row, img_pos, image_pos[row*MAXIMPERSOURCE + img_pos + 0].x, image_pos[row*MAXIMPERSOURCE + img_pos + 0].y); - img_pos++; - } - else - { - mychi_transformtriangleImageToSourcePlane_SOA_grid_gradient_lower_GPU(&Tinf, sources[source_id].dr, &Tsource, grid_gradient_x, grid_gradient_y, y_id*runmode->nbgridcells + x_id, runmode->nbgridcells); - if (mychi_inside_GPU(&sources[source_id].center, &Tsource) == 1) - { - image_pos[row*MAXIMPERSOURCE + img_pos + 0] = mychi_barycenter_GPU(&Tinf); - //printf("%d %d: T = %f %f\n", row, img_pos, image_pos[row*MAXIMPERSOURCE + img_pos + 0].x, image_pos[row*MAXIMPERSOURCE + img_pos + 0].y); - img_pos++; - } - } - } -} -// -// -// -#if 1 -void delense_barycenter_GPU(struct point* image_pos, int* locimagesfound, int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy* sources, double* grid_gradient_x, double* grid_gradient_y) -{ -#define INDEX2D_BAR(y, x) (MAXIMPERSOURCE*y + x) - //const unsigned int nimagestot = runmode->nimagestot; - const unsigned int nsets = runmode->nsets; - const unsigned int nbgridcells = runmode->nbgridcells; - const unsigned int nbgridcells_x = runmode->nbgridcells; - const unsigned int nbgridcells_y = runmode->nbgridcells; - // - int world_size = 1; - int world_rank = 0; - // -#ifdef __WITH_MPI - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); -#endif - // - unsigned int verbose = (world_rank == 0); - // - int numimg_loc = 0; - // - for (int ii = 0; ii < nsets; ++ii) - numimg_loc += nimages_strongLensing[ii]; - // - //const int nbgridcells = runmode->nbgridcells; - const int grid_size = nbgridcells; - const int loc_grid_size = nbgridcells/world_size; - // - double y_len = fabs(frame->ymax - frame->ymin); - int y_len_loc = nbgridcells/world_size; - int y_pos_loc = (int) world_rank*y_len_loc; - int y_bound = y_len_loc; - // - if ((world_rank + 1) != world_size) y_bound += 1; - // - printf("%d: numimg_loc = %d, y_len_loc = %d, y_pos_loc = %d, y_bound = %d\n", world_rank, numimg_loc, y_len_loc, y_pos_loc, y_bound); - // - const double dx = (frame->xmax - frame->xmin)/(nbgridcells - 1); - const double dy = (frame->ymax - frame->ymin)/(nbgridcells - 1); - // - int images_total = 0; - int index = 0; - // - int block_size_x = BLOCK_SIZE_X; - int block_size_y = BLOCK_SIZE_Y; - // - int GRID_SIZE_X = (nbgridcells_y + block_size_y - 1)/block_size_y; // number of blocks - int GRID_SIZE_Y = 1; - // - dim3 threads(block_size_x, block_size_y); - dim3 grid (GRID_SIZE_X , GRID_SIZE_Y); - // - double alloc_time = -myseconds(); - struct galaxy* sources_gpu; - moveToGPU((void**) &sources_gpu, (void*) sources, sizeof(galaxy)*nsets); - // - struct grid_param* frame_gpu; - moveToGPU((void**) &frame_gpu , (void*) frame , sizeof(grid_param)); - // - struct runmode_param* runmode_gpu; - moveToGPU((void**) &runmode_gpu, (void*) runmode, sizeof(runmode_param)); - // - struct point* image_pos_gpu; - cudaMallocManaged((void**) &image_pos_gpu, grid_size*block_size_y*MAXIMPERSOURCE*sizeof(struct point)); - // - //printf("image_pos size = %d\n", grid_size*block_size_y*MAXIMPERSOURCE); - // -#if 1 - // - memset(locimagesfound, 0, nsets*sizeof(int)); - // - cudaDeviceSynchronize(); - cudasafe(cudaGetLastError(), "before image_pos_gpu allocation "); - alloc_time += myseconds(); - // -#endif - // - double time_gpu, time_cpu; - // - time_gpu = 0.; - time_cpu = 0.; - // - for( int source_id = 0; source_id < nsets; source_id ++) - { - int loc_images_found = 0; - // - int x_pos_loc = 0; - cudasafe(cudaGetLastError(), "before delense_GPU"); - time_gpu -= myseconds(); - // - delense_GPU<<>> (image_pos_gpu, runmode_gpu, lens, frame_gpu, sources_gpu, y_pos_loc, y_bound, source_id, grid_gradient_x, grid_gradient_y); - // - cudaDeviceSynchronize(); - cudasafe(cudaGetLastError(), "after delense_GPU"); - time_gpu += myseconds(); - fflush(stdout); -#if 1 - time_cpu -= myseconds(); - int numimg_cpu = 0; -#pragma omp parallel for reduction(+ : numimg_cpu) - for (int ii = 0; ii < block_size_y*nbgridcells_x*MAXIMPERSOURCE; ++ii) - { - struct point T = image_pos_gpu[ii]; - if((T.x != 0.) || (T.y != 0.)) - { -#pragma omp critical - { - image_pos[source_id*MAXIMPERSOURCE + locimagesfound[source_id]] = T; - locimagesfound[source_id]++; - numimg_cpu++; - } - image_pos_gpu[ii].x = image_pos_gpu[ii].y = 0.; - } - } - //memset(image_pos_gpu, 0, nbgridcells*MAXIMPERSOURCE*sizeof(struct point)); - //memset(image_pos_gpu, 0, loc_grid_size*MAXIMPERSOURCE*sizeof(struct point)); - *numimg = *numimg + numimg_cpu; - time_cpu += myseconds(); - //memset(image_pos_gpu, 0, 2*nbgridcells*nbgridcells*sizeof(struct point)); -#endif - } - cudaFree(image_pos_gpu); - printf("num img found = %d, gpu time = %f, cpu time = %f\n", *numimg, time_gpu, time_cpu); -} -#endif diff --git a/Benchmarks/ChiBenchmark/delense_GPU.cuh b/Benchmarks/ChiBenchmark/delense_GPU.cuh deleted file mode 100644 index eca9c20..0000000 --- a/Benchmarks/ChiBenchmark/delense_GPU.cuh +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#ifdef __WITH_MPI -#include -#endif - -#include -#include -#include -#ifdef __AVX512F__ -#include "gradient_avx512f.hpp" -#else -#include -#endif -#include "delense_CPU_utils.hpp" - - -// -//void delense_barycenter(struct point*** image_pos, int* locimagesfound, int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy* sources, double* grid_gradient_x, double* grid_gradient_y); -void delense_barycenter(struct point* image_pos, int* locimagesfound, int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy* sources, double* grid_gradient_x, double* grid_gradient_y); -// -//void delense(int nsets, int nimagestot, struct point* image_pos[nsets][nimagestot][MAXIMPERSOURCE], int* locimagesfound[nsets][nimagestot], int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy sources[nsets][nimagestot], double* grid_gradient_x, double* grid_gradient_y); -//void delense(int nsets, int nimagestot, struct point image_pos[nsets][nimagestot][MAXIMPERSOURCE], int locimagesfound[nsets][nimagestot], int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy sources[nsets][nimagestot], double* grid_gradient_x, double* grid_gradient_y); -// -void delense(struct point* image_pos, int* locimagesfound, int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy* sources, double* grid_gradient_x, double* grid_gradient_y); -// -void delense_barycenter_GPU(struct point* image_pos, int* locimagesfound, int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy* sources, double* grid_gradient_x, double* grid_gradient_y); -// -void delense_barycenter_GPU_new(struct point* image_pos, int* locimagesfound, int* numimg, const runmode_param *runmode, const struct Potential_SOA *lens, const struct grid_param *frame, const int *nimages_strongLensing, const struct galaxy* sources, double* grid_gradient_x, double* grid_gradient_y); -// diff --git a/Benchmarks/ChiBenchmark/mpi_check.h b/Benchmarks/ChiBenchmark/mpi_check.h deleted file mode 100644 index cf3605f..0000000 --- a/Benchmarks/ChiBenchmark/mpi_check.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#include - -#ifndef MPI_CHECK -#define MPI_CHECK(stmt) \ - do \ - { \ - const int code = stmt; \ - \ - if (code != MPI_SUCCESS) \ - { \ - char error_string[2048]; \ - int length_of_error_string = sizeof(error_string); \ - MPI_Error_string(code, error_string, &length_of_error_string); \ - \ - fprintf(stderr, \ - "ERROR!\n" #stmt " mpiAssert: %s %d %s\n", \ - __FILE__, __LINE__, error_string); \ - fflush(stderr); \ - \ - MPI_Abort(MPI_COMM_WORLD, code); \ - } \ - } \ - while(0) -#endif diff --git a/Benchmarks/ChiBenchmark/timer.h b/Benchmarks/ChiBenchmark/timer.h deleted file mode 100644 index 781c8f7..0000000 --- a/Benchmarks/ChiBenchmark/timer.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once -// -#include -extern "C"{ -double myseconds() -{ - struct timeval tp; - struct timezone tzp; - // - int i = gettimeofday(&tp,&tzp); - // - return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); -} -} - - -static inline unsigned long long cycles() -{ - unsigned long long u; - __asm__ volatile ("rdtscp;shlq $32,%%rdx;orq %%rdx,%%rax;movq %%rax,%0":"=q"(u)::"%rax", "%rdx", "rcx"); - return u; -}