diff --git a/GradientBenchmark/.settings/language.settings.xml b/GradientBenchmark/.settings/language.settings.xml new file mode 100644 index 0000000..970055b --- /dev/null +++ b/GradientBenchmark/.settings/language.settings.xml @@ -0,0 +1,12 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<project> + <configuration id="cdt.managedbuild.toolchain.gnu.base.2141909965" name="Default"> + <extension point="org.eclipse.cdt.core.LanguageSettingsProvider"> + <provider copy-of="extension" id="org.eclipse.cdt.ui.UserLanguageSettingsProvider"/> + <provider-reference id="org.eclipse.cdt.core.ReferencedProjectsLanguageSettingsProvider" ref="shared-provider"/> + <provider class="org.eclipse.cdt.managedbuilder.language.settings.providers.GCCBuildCommandParser" id="org.eclipse.cdt.managedbuilder.core.GCCBuildCommandParser" keep-relative-paths="false" name="CDT GCC Build Output Parser" parameter="(gcc)|([gc]\+\+)|(clang)" prefer-non-shared="true"/> + <provider-reference id="org.eclipse.cdt.managedbuilder.core.GCCBuiltinSpecsDetector" ref="shared-provider"/> + <provider-reference id="org.eclipse.cdt.managedbuilder.core.MBSLanguageSettingsProvider" ref="shared-provider"/> + </extension> + </configuration> +</project> diff --git a/GradientBenchmark/BenchmarkGradSoA.txt b/GradientBenchmark/BenchmarkGradSoA.txt new file mode 100644 index 0000000..f84fc80 --- /dev/null +++ b/GradientBenchmark/BenchmarkGradSoA.txt @@ -0,0 +1,4 @@ +Benchmark for Gradient Calculation +Sample size 10: 3.4e-05 +Sample size 100: 4e-05 +Sample size 1000: 0.0004 diff --git a/GradientBenchmark/GradienBenchmark b/GradientBenchmark/GradienBenchmark new file mode 100755 index 0000000..b664328 Binary files /dev/null and b/GradientBenchmark/GradienBenchmark differ diff --git a/GradientBenchmark/main.cpp b/GradientBenchmark/main.cpp index 21b5186..85ab715 100644 --- a/GradientBenchmark/main.cpp +++ b/GradientBenchmark/main.cpp @@ -1,279 +1,310 @@ /** * @file main.cpp * @Author Christoph Schaaefer, EPFL (christophernstrerne.schaefer@epfl.ch) * @date October 2016 * @brief Benchmark for gradhalo function */ #include <iostream> #include <string.h> #include <cuda_runtime.h> #include "structure.h" #include <math.h> #include <sys/time.h> #include <fstream> /** for both gradient and second derivatives **/ static struct point rotateCoordinateSystem(struct point P, double theta); /** gradient **/ -struct point module_potentialDerivatives_totalGradient(const runmode_param *runmode, const struct point *pImage, const struct Potential *lens); -static struct point grad_halo(const struct point *pImage, const struct Potential *lens); +struct point module_potentialDerivatives_totalGradient(const runmode_param *runmode, const struct point *pImage, PotentialSet *lens ); +static struct point grad_halo(const struct point *pImage, int iterator,PotentialSet *lens); /** PIEMD **/ static complex piemd_1derivatives_ci05(double x, double y, double eps, double rc); /** Potential **/ void module_readParameters_calculatePotentialparameter(Potential *lens); int main() { //Constant int small(10); int medium(100); int big(1000); //Variable creation struct timeval t1, t2, t3, t4; runmode_param runmodesmall; runmode_param runmodemedium; runmode_param runmodebig; point image; Potential *ilens; Potential lens[big]; //Initialisation runmodesmall.nhalos = small; runmodemedium.nhalos = medium; runmodebig.nhalos = big; image.x = image.y = 2; for (int i = 0; i <big; ++i){ ilens = &lens[i]; ilens->position.x = ilens->position.y = 0.; ilens->type = 8; ilens->ellipticity = 0.11; ilens->ellipticity_potential = 0.; ilens->ellipticity_angle = 0.; ilens->rcut = 5.; ilens->rcore = 1; ilens->weight = 0; ilens->rscale = 0; ilens->exponent = 0; ilens->alpha = 0.; ilens->einasto_kappacritic = 0; ilens->z = 0.4; module_readParameters_calculatePotentialparameter(ilens); } +/** SoA part **/ + +//Init PotentialSet + +PotentialSet lenses; +lenses.type = new int[big]; +lenses.x = new double[big]; +lenses.y = new double[big]; +lenses.b0 = new double[big]; +lenses.ellipticity_angle = new double[big]; +lenses.ellipticity = new double[big]; +lenses.ellipticity_potential = new double[big]; +lenses.rcore = new double[big]; +lenses.rcut = new double[big]; +lenses.z = new double[big]; + +for (int i = 0; i <big; ++i){ + lenses.type[i] = lens[i].type; + lenses.x[i] = lens[i].position.x; + lenses.y[i] = lens[i].position.y; + lenses.b0[i] = lens[i].b0; + lenses.ellipticity_angle[i] = lens[i].ellipticity_angle; + lenses.ellipticity[i] = lens[i].ellipticity; + lenses.ellipticity_potential[i] = lens[i].ellipticity_potential; + lenses.rcore[i] = lens[i].rcore; + lenses.rcut[i] = lens[i].rcut; + lenses.z[i] = lens[i].z; + +} + + gettimeofday(&t1, 0); -module_potentialDerivatives_totalGradient(&runmodesmall,&image, lens); +module_potentialDerivatives_totalGradient(&runmodesmall,&image, &lenses); gettimeofday(&t2, 0); -module_potentialDerivatives_totalGradient(&runmodemedium,&image, lens); +module_potentialDerivatives_totalGradient(&runmodemedium,&image, &lenses); gettimeofday(&t3, 0); -module_potentialDerivatives_totalGradient(&runmodebig,&image, lens); +module_potentialDerivatives_totalGradient(&runmodebig,&image, &lenses); gettimeofday(&t4, 0); double time1 = (1000000.0*(t2.tv_sec-t1.tv_sec) + t2.tv_usec-t1.tv_usec)/1000000.0; double time2 = (1000000.0*(t3.tv_sec-t2.tv_sec) + t3.tv_usec-t2.tv_usec)/1000000.0; double time3 = (1000000.0*(t4.tv_sec-t3.tv_sec) + t4.tv_usec-t3.tv_usec)/1000000.0; std::cout << "Benchmark for Gradient Calculation "<< std::endl; std::cout << "Sample size " << small << ": " << time1 << std::endl; std::cout << "Sample size " << medium << ": " << time2 << std::endl; std::cout << "Sample size " << big << ": " << time3 << std::endl; std::ofstream myfile; -myfile.open ("BenchmarkGrad.txt"); +myfile.open ("BenchmarkGradSoA.txt"); myfile << "Benchmark for Gradient Calculation "<< std::endl; myfile << "Sample size " << small << ": " << time1 << std::endl; myfile << "Sample size " << medium << ": " << time2 << std::endl; myfile << "Sample size " << big << ": " << time3 << std::endl; myfile.close(); } -struct point module_potentialDerivatives_totalGradient(const runmode_param *runmode, const struct point *pImage, const struct Potential *lens) +struct point module_potentialDerivatives_totalGradient(const runmode_param *runmode, const struct point *pImage, PotentialSet *lens ) { struct point grad, clumpgrad; grad.x=0; grad.y=0; for(int i=0; i<runmode->nhalos; i++){ - clumpgrad=grad_halo(pImage,&lens[i]); //compute gradient for each clump separately + clumpgrad=grad_halo(pImage,i,lens); //compute gradient for each clump separately if(clumpgrad.x == clumpgrad.x or clumpgrad.y == clumpgrad.y){ //nan check grad.x+=clumpgrad.x; grad.y+=clumpgrad.y; } // add the gradients } return(grad); } /**@brief Return the gradient of the projected lens potential for one clump * !!! You have to multiply by dlsds to obtain the true gradient * for the expressions, see the papers : JP Kneib & P Natarajan, Cluster Lenses, The Astronomy and Astrophysics Review (2011) for 1 and 2 * and JP Kneib PhD (1993) for 3 * * @param pImage point where the result is computed in the lens plane * @param lens mass distribution */ -static struct point grad_halo(const struct point *pImage, const struct Potential *lens) +static struct point grad_halo(const struct point *pImage, int iterator,PotentialSet *lens) { struct point true_coord, true_coord_rotation, result; double R, angular_deviation; complex zis; result.x = result.y = 0.; /*positionning at the potential center*/ - true_coord.x = pImage->x - lens->position.x; // Change the origin of the coordinate system to the center of the clump - true_coord.y = pImage->y - lens->position.y; + true_coord.x = pImage->x - lens->x[iterator]; // Change the origin of the coordinate system to the center of the clump + true_coord.y = pImage->y - lens->y[iterator]; - switch (lens->type) + switch (lens->type[iterator]) { case(5): /*Elliptical Isothermal Sphere*/ /*rotation of the coordiante axes to match the potential axes*/ - true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle); + true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[iterator]); - R=sqrt(true_coord_rotation.x*true_coord_rotation.x*(1-lens->ellipticity/3.)+true_coord_rotation.y*true_coord_rotation.y*(1+lens->ellipticity/3.)); //ellippot = ellipmass/3 - result.x=(1-lens->ellipticity/3.)*lens->b0*true_coord_rotation.x/(R); - result.y=(1+lens->ellipticity/3.)*lens->b0*true_coord_rotation.y/(R); + R=sqrt(true_coord_rotation.x*true_coord_rotation.x*(1-lens->ellipticity[iterator]/3.)+true_coord_rotation.y*true_coord_rotation.y*(1+lens->ellipticity[iterator]/3.)); //ellippot = ellipmass/3 + result.x=(1-lens->ellipticity[iterator]/3.)*lens->b0[iterator]*true_coord_rotation.x/(R); + result.y=(1+lens->ellipticity[iterator]/3.)*lens->b0[iterator]*true_coord_rotation.y/(R); break; case(8): /* PIEMD */ /*rotation of the coordiante axes to match the potential axes*/ - true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle); + true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[iterator]); /*Doing something....*/ - zis = piemd_1derivatives_ci05(true_coord_rotation.x, true_coord_rotation.y, lens->ellipticity_potential, lens->rcore); + zis = piemd_1derivatives_ci05(true_coord_rotation.x, true_coord_rotation.y, lens->ellipticity_potential[iterator], lens->rcore[iterator]); - result.x=lens->b0 * zis.re; - result.y=lens->b0 * zis.im; + result.x=lens->b0[iterator] * zis.re; + result.y=lens->b0[iterator] * zis.im; break; default: - std::cout << "ERROR: Grad 1 profil type of clump "<< lens->name << " unknown : "<< lens->type << std::endl; + std::cout << "ERROR: Grad 1 profil type of clump unknown : "<< lens->type[iterator] << std::endl; break; }; return result; } /**** usefull functions for PIEMD profile : see old lenstool ****/ /** I*w,v=0.5 Kassiola & Kovner, 1993 PIEMD, paragraph 4.1 * * Global variables used : * - none */ static complex piemd_1derivatives_ci05(double x, double y, double eps, double rc) { double sqe, cx1, cxro, cyro, rem2; complex zci, znum, zden, zis, zres; double norm; sqe = sqrt(eps); cx1 = (1. - eps) / (1. + eps); cxro = (1. + eps) * (1. + eps); cyro = (1. - eps) * (1. - eps); rem2 = x * x / cxro + y * y / cyro; /*zci=cpx(0.,-0.5*(1.-eps*eps)/sqe); znum=cpx(cx1*x,(2.*sqe*sqrt(rc*rc+rem2)-y/cx1)); zden=cpx(x,(2.*rc*sqe-y)); zis=pcpx(zci,lncpx(dcpx(znum,zden))); zres=pcpxflt(zis,b0);*/ // --> optimized code zci.re = 0; zci.im = -0.5 * (1. - eps * eps) / sqe; znum.re = cx1 * x; znum.im = 2.*sqe * sqrt(rc * rc + rem2) - y / cx1; zden.re = x; zden.im = 2.*rc * sqe - y; norm = zden.re * zden.re + zden.im * zden.im; // zis = znum/zden zis.re = (znum.re * zden.re + znum.im * zden.im) / norm; zis.im = (znum.im * zden.re - znum.re * zden.im) / norm; norm = zis.re; zis.re = log(sqrt(norm * norm + zis.im * zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) zis.im = atan2(zis.im, norm); // norm = zis.re; zres.re = zci.re * zis.re - zci.im * zis.im; // Re( zci*ln(zis) ) zres.im = zci.im * zis.re + zis.im * zci.re; // Im( zci*ln(zis) ) //zres.re = zis.re*b0; //zres.im = zis.im*b0; return(zres); } /// Useful functions // changes the coordinates of point P into a new basis (rotation of angle theta) // y' y x' // * | / // * | / theta // * | / // *|--------->x static struct point rotateCoordinateSystem(struct point P, double theta) { struct point Q; Q.x = P.x*cos(theta) + P.y*sin(theta); Q.y = P.y*cos(theta) - P.x*sin(theta); return(Q); } /** @brief This module function calculates profile depended information like the impactparameter b0 and the potential ellipticity epot * * @param lens: mass distribution for which to calculate parameters */ void module_readParameters_calculatePotentialparameter(Potential *lens){ switch (lens->type) { case(5): /*Elliptical Isothermal Sphere*/ //impact parameter b0 lens->b0 = 4* pi_c2 * lens->vdisp * lens->vdisp ; //ellipticity_potential lens->ellipticity_potential = lens->ellipticity/3 ; break; case(8): /* PIEMD */ //impact parameter b0 lens->b0 = 6.*pi_c2 * lens->vdisp * lens->vdisp; //ellipticity_parameter if ( lens->ellipticity == 0. && lens->ellipticity_potential != 0. ){ // emass is (a2-b2)/(a2+b2) lens->ellipticity = 2.*lens->ellipticity_potential / (1. + lens->ellipticity_potential * lens->ellipticity_potential); //printf("1 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); } else if ( lens->ellipticity == 0. && lens->ellipticity_potential == 0. ){ lens->ellipticity_potential = 0.00001; //printf("2 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); } else{ // epot is (a-b)/(a+b) lens->ellipticity_potential = (1. - sqrt(1 - lens->ellipticity * lens->ellipticity)) / lens->ellipticity; //printf("3 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); } break; default: std::cout << "ERROR: LENSPARA profil type of clump "<< lens->name << " unknown : "<< lens->type << std::endl; //printf( "ERROR: LENSPARA profil type of clump %s unknown : %d\n",lens->name, lens->type); break; }; } diff --git a/GradientBenchmark/main.o b/GradientBenchmark/main.o new file mode 100644 index 0000000..e8a94c1 Binary files /dev/null and b/GradientBenchmark/main.o differ diff --git a/GradientBenchmark/structure.h b/GradientBenchmark/structure.h index 17a2688..d129dd7 100644 --- a/GradientBenchmark/structure.h +++ b/GradientBenchmark/structure.h @@ -1,501 +1,520 @@ /** * @file structure.h * @Author Thomas Jalabert, EPFL (me@example.com) , Christoph Schaefer, EPFL (christophernstrerne.schaefer@epfl.ch) * @date July 2015 * @version 0,1 * @brief Header file to define the used structures (e.g. defined structs) * * @param configuration file (parameters.par) * @return Depends on choice in the configuration file, e.g. least chi2 model */ // Header guard #ifndef STRUCTURE_H #define STRUCTURE_H #include <iostream> /*****************************************************************/ /* */ /* Constants: Will be migrated to constants.h when there are too many of them*/ /* */ /*****************************************************************/ // GPU definitions #define threadsPerBlock 512 // threads for each set of images #define MAXIMPERSOURCE 20 // maximum number of multiple images for one source #define MAXIM 200 // maximum total number of images treated // Dimensions definitions #define NPZMAX 9 /* maximum number of critical lines in g_cline struct*/ //#define CLINESIZE 500 /* maximum number of critical and caustic points for Cline mode. Adjust depending on RAM*/ #define NPOTFILESIZE 2000 //maximum number of potential in potfiles #define DMIN 1e-4 // distance minimale de convergence dans le plan image (in arcsec) #define NITMAX 100 // gNFW definitions #define gNFW_ARRAY_SIZE 1800 // Set the dimension of the gnfw table gNFW_ARRAY_SIZE, must be 1800 for the current table file // Filename definition #define FILENAME_SIZE 50 // size of a filename in .par file //constants definition #define pi_c2 7.209970e-06 /* pi en arcsecond/ c^2 = 648000/vol/vol */ #define cH2piG 0.11585881 /* cH0/2piG en g/cm^2 (H0=50) */ #define cH4piG 0.057929405 /* cH0/4piG en g/cm^2 (H0=50) */ #define cH0_4piG 2.7730112e-4 /* cH0/4piG en 10^12 M_Sol/kpc^2 (H0=50) */ #define d0 29.068701 /* vol/h0*1000/rta -- c/H0 en h-1.kpc/arcsecond (h0=50)*/ #define MCRIT12 .2343165 /* c^3/4piGh0/RTA/RTA in 1e12 M_sol/arcsec^2 (h0=50) */ /*****************************************************************/ /* */ /* Types definition */ /* */ /*****************************************************************/ /** @brief Point: Structure of 2 coordinates * * @param x: X coordinate * @param y: Y coordinate */ struct point { double x; double y; }; /** @brief Complex: Structure of 2 doubles * @param re: Real Part * @param im: Imaginary Part */ struct complex { double re; double im; }; /** @brief Segment: Structure of two points */ struct segment { point a; point b; }; /** @brief triplet: Structure of three points defining a triangle */ struct triplet { struct point a; struct point b; struct point c; }; /** @brief bitriplet: Defines two linked triangles (one in source plane and one in image plane) * @param i: Triangle on image plane * @param s: Triangle on source plane */ struct bitriplet { struct triplet i; struct triplet s; }; /** @brief contains the table needed to compute the potential derivatives of general NFW profile */ typedef struct { double alpha_now, x_now, kappa, dpl; } gNFWdata; /** @brief Matrix: 2by2 doubles */ struct matrix { double a; double b; double c; double d; }; /** @brief ellipse: for shape computation * @param a: semimajor axis * @param b: semiminor axis * @param theta: shape ellipticity */ struct ellipse { double a; double b; double theta; }; /** @brief Storage type for sources, lenses and arclets * @param center: position of the center of galaxy * @param shape: shape of galaxy * @param mag: magnitude * @param redshift: redshift * @param dls: Distance lens-source * @param dos: Distance observer-source * @param dr: dls/dos */ struct galaxy { //char name[IDSIZE]; struct point center; struct ellipse shape; double mag; double redshift; double dls; double dos; double dr; }; /** @brief Contains the information for optimising a parameter in the inverse mode * @param block: blockorfree variable (whether a parameter is blocked or free for the mcmc algorithm) * @param min: lower optimisation value * @param max: upper optimisation value * @param sigma: optimisation step (MIGHT NOT BE TAKEN INTO ACCOUNT) */ struct optimize_block { int block; double min; double max; double sigma; }; /** @brief two optimize_block to simulate a point */ struct optimize_point // blockorfree for the point structure { struct optimize_block x; struct optimize_block y; }; /** @brief Contains the information for optimising the potential in the inverse mode * @param position: position of the center of the halo * @param weight: weight of the clump (the projected mass sigma0 for PIEMD, the density rhoscale for NFW) * @param b0: Impact parameter * @param ellipticity_angle: orientation of the clump * @param ellipticity: Mass ellipticity * @param ellipticity_potential: Potential ellipticity * @param rcore: PIEMD specific value * @param rcut: PIEMD specific value * @param rscale: scale radius for NFW, Einasto * @param exponent: exponent for Einasto * @param vdisp: Dispersion velocity * @param alpha: exponent for general NFW * @param einasto_kappacritic: critical kappa for Einasto profile * @param z: redshift */ struct potentialoptimization // block or free variable for the MCMC for the potential { struct optimize_point position; struct optimize_block weight; struct optimize_block b0; struct optimize_block ellipticity_angle; struct optimize_block ellipticity; struct optimize_block ellipticity_potential; struct optimize_block rcore; struct optimize_block rcut; struct optimize_block rscale; struct optimize_block exponent; struct optimize_block vdisp; struct optimize_block alpha; struct optimize_block einasto_kappacritic; struct optimize_block z; }; /** @brief Contains the information of the lens potential * @param type: 1=PIEMD , 2=NFW, 3=SIES, 4=point mass, 5=SIS, 8=PIEMD * @param type_name: IEMD, NFW, SIES, point * @param name: name of the clump (e.g. name of the galaxy) : not compulsory * @param position: position of the center of the halo * @param weight: weight of the clump (the projected mass sigma0 for PIEMD, the density rhoscale for NFW) * @param b0: Impact parameter * @param ellipticity_angle: * @param ellipticity: Mass ellipticity * @param ellipticity_potential: Potential ellipticity * @param rcore: PIEMD specific value * @param rcut: PIEMD specific value * @param rscale: scale radius for NFW, Einasto * @param exponent: exponent for Einasto * @param vdisp: Dispersion velocity * @param alpha: exponent for general NFW * @param einasto_kappacritic: critical kappa for Einasto profile * @param z: redshift */ struct Potential { int type; // 1=PIEMD ; 2=NFW; 3=SIES, 4=point mass char type_name[10]; // PIEMD, NFW, SIES, point char name[20]; // name of the clump (e.g. name of the galaxy) : not compulsory struct point position; // position of the center of the halo double weight; // weight of the clump (the projected mass sigma0 for PIEMD, the density rhoscale for NFW) double b0; // Impact parameter double vdisp; //Dispersion velocity double ellipticity_angle; // orientation of the clump double ellipticity; // ellipticity of the mass distribition double ellipticity_potential; //ellipticity of the potential double rcore; // core radius double rcut; // cut radius double rscale; // scale radius for NFW, Einasto double exponent; // exponent for Einasto double alpha; // exponent for general NFW double einasto_kappacritic; // critical kappa for Einasto profile double z; // redshift of the clump double mag; //magnitude double lum; //luminosity double theta; //theta double sigma; // sigma }; +/* @brief Contains the information of the lens potentials under + * SoA form instead of AoS form for better memory acess by the + * processor. + */ + +struct PotentialSet +{ + int *type; // 1=PIEMD ; 2=NFW; 3=SIES, 4=point mass + double *x; //x position + double *y; //y position + double *b0; // Impact parameter + double *ellipticity_angle; // orientation of the clump + double *ellipticity; // ellipticity of the mass distribition + double *ellipticity_potential; //ellipticity of the potential + double *rcore; // core radius + double *rcut; // cut radius + double *z; // redshift of the clump +}; + /*****************************************************************/ /* */ /* Control structure definition */ /* */ /*****************************************************************/ /** @brief Control structure for runmode parameters * * Default values are set in module_readParameters_readRunmode * * @param nbgridcells: Number of grid cells * @param source: flag for simple source to image conversion * @param sourfile: file name for source information * @param image: flag for simple image to source conversion * @param imafile: file name for image information * @param mass: flag for mass fitsfile * @param massgridcells: Nb of cell for fitsfile * @param z_mass: redshift for which to be computed * @param z_mass_s: redshift of source for which effect of mass will be computed * @param potential: flag for potential fitsfile * @param potgridcells: Nb of cell for fitsfile * @param z_pot: redshift for which to be computed * @param dpl: flag for displacement fitsfile * @param dplgridcells: Nb of cell for fitsfile * @param z_dpl: redshift for which to be computed * @param inverse: flag for inversion mode (MCMC etc,) * @param arclet: flag for arclet mode * @param debug: flag for debug mode * @param nimagestotal: total number of lensed images in file * @param nsets: number of sources attributed to the lensed images * @param nhalo: Number of halos * @param grid: 0 for automatic grid (not implemented), 1 for grid infor applying on source plane, 2 for grid information applying on image plane * @param nbgridcells: Number of grid cells * @param zgrid: redshift of grid * @param cline: flag for critical and caustic line calculation */ struct runmode_param { int nbgridcells; //Source Mode int source; std::string sourfile; int nsets; //Image Mode int image; std::string imagefile; int nimagestot; //Mass Mode int mass; int mass_gridcells; double z_mass; double z_mass_s; //Potential Mode int potential; int pot_gridcells; double z_pot; int nhalos; //Potfile Mode int potfile; int npotfile; std::string potfilename; //displacement Mode int dpl; int dpl_gridcells; double z_dpl; //Inverse Mode int inverse; //Arclet Mode int arclet; //Debug Mode int debug; //Grid Mode int grid; int gridcells; double zgrid; //Critic and caustic mode int cline; //Amplification Mode int amplif; int amplif_gridcells; double z_amplif; //Time/Benchmark mode int time; }; /** @brief Not used yet * */ struct image_param { }; /** @brief Not used yet * */ struct source_param { }; /** @brief Contains Grid information */ struct grid_param { double xmin; double xmax; double ymin; double ymax; double lmin; double lmax; double rmax; }; /** @brief Control structure for cosmological parameters * * @param model: Cosmological model * @param omegaM: * @param omegaX: * @param curvature: curvature parameter * @param wX: * @param wa: * @param H0: Hubble constant * @param h: H0/50 */ struct cosmo_param { int model; double omegaM; double omegaX; double curvature; double wX; double wa; double H0; double h; }; /** @brief Control structure for potfile parameters * * @param potid: 1: pot P, 2: pot Q @param ftype: @param potfile[FILENAME_SIZE]; @param type; @param zlens; @param core;CCC @param corekpc; @param mag0; @param select; @param ircut; @param cut, cut1, cut2; @param cutkpc1, cutkpc2; @param isigma; @param sigma, sigma1, sigma2; @param islope; @param slope, slope1, slope2; @param ivdslope; @param vdslope, vdslope1, vdslope2; @param ivdscat; @param vdscat, vdscat1, vdscat2; @param ircutscat; @param rcutscat, rcutscat1, rcutscat2; @param ia; // scaling relation of msm200 @param a, a1, a2; @param ib; // scaling relation of msm200 @param b, b1, b2; */ struct potfile_param { int potid; // 1: pot P, 2: pot Q int ftype; char potfile[FILENAME_SIZE]; int type; double zlens; double core; double corekpc; double mag0; int select; int ircut; double cut, cut1, cut2; double cutkpc1, cutkpc2; int isigma; double sigma, sigma1, sigma2; int islope; double slope, slope1, slope2; int ivdslope; double vdslope, vdslope1, vdslope2; int ivdscat; double vdscat, vdscat1, vdscat2; int ircutscat; double rcutscat, rcutscat1, rcutscat2; int ia; // scaling relation of msm200 double a, a1, a2; int ib; // scaling relation of msm200 double b, b1, b2; int npotfile; }; /** @brief Control structure for caustic and critical lines * * @param nplan: number of sourceplanes for which the caustic lines will be computed * @param cz: redshift values array for respective sourceplanes * @param dos: distcosmo1 to redshift z * @param dls: distcosmo2 between lens[0] and z * @param dlsds: ratio of dl0s/dos * @param limitLow: minimum size of the squares in MARCHINGSQUARES * @param dmax: Size of search area * @param xmin: * @param xmax: * @param ymin: * @param ymax: * @param limithigh: maximum size of the squares in MARCHINGSQUARES algorithm * @param nbgridcells: nbgridcells * nbgridcells = number of pixels for critical line computation */ struct cline_param { int nplan; double cz[NPZMAX]; double dos[NPZMAX]; // distcosmo1 to redshift z double dls[NPZMAX]; // distcosmo2 between lens[0] and z double dlsds[NPZMAX]; // ratio of dl0s/dos double limitLow; // minimum size of the squares in MARCHINGSQUARES or initial step size in SNAKE double dmax; double xmin; double xmax; double ymin; double ymax; double limitHigh; // maximum size of the squares in MARCHINGSQUARES algorithm int nbgridcells; // nbgridcells * nbgridcells = number of pixels for critical line computation }; #endif // if STRUCTURE_H diff --git a/GradientBenchmarkInitial/BenchmarkGrad.txt b/GradientBenchmarkInitial/BenchmarkGrad.txt new file mode 100644 index 0000000..6770b1c --- /dev/null +++ b/GradientBenchmarkInitial/BenchmarkGrad.txt @@ -0,0 +1,4 @@ +Benchmark for Gradient Calculation +Sample size 10: 2.2e-05 +Sample size 100: 2.4e-05 +Sample size 1000: 0.000236 diff --git a/GradientBenchmarkInitial/Makefile b/GradientBenchmarkInitial/Makefile new file mode 100644 index 0000000..4a5b17f --- /dev/null +++ b/GradientBenchmarkInitial/Makefile @@ -0,0 +1,77 @@ +PROGRAM_NAME := GradienBenchmark + +program_CXX_SRCS := $(wildcard *.cpp) +#program_CXX_SRCS += $(wildcard ../../*.cpp) #Find C++ source files from additonal directories +program_CXX_OBJS := ${program_CXX_SRCS:.cpp=.o} + +program_C_SRCS := $(wildcard *.c) +#program_CXX_SRCS += $(wildcard ../../*.c) #Find C source files from additonal directories +program_C_OBJS := ${program_C_SRCS:.c=.o} + +program_CU_SRCS := $(wildcard *.cu) +#program_CU_SRCS += $(wildcard ../../*.cu) #Find CUDA source files from additional directories +#program_CU_HEADERS := $(wildcard *.cuh) #Optional: Include .cuh files as dependencies +#program_CU_HEADERS += $(wildcard ../../*.cuh) #Find .cuh files from additional directories +program_CU_OBJS := ${program_CU_SRCS:.cu=.cuo} + +program_INCLUDE_DIRS := . /usr/local/cuda/include/ #C++ Include directories +program_INCLUDE_DIRS += /usr/include/cfitsio/ +#program_CU_INCLUDE_DIRS := /home/users/amclaugh/CUB/cub-1.3.2/ #CUDA Include directories + +program_INCLUDE_LIBS := /usr/lib64/ #Include libraries + +# Compiler flags +CPPFLAGS += $(foreach includedir,$(program_INCLUDE_DIRS),-I$(includedir)) +CPPFLAGS += $(foreach includelib,$(program_INCLUDE_LIBS),-L$(includelib) -lcfitsio) +CXXFLAGS += -g -O3 -std=c++0x -Wall -pedantic + +GEN_SM35 := -gencode=arch=compute_35,code=\"sm_35,compute_35\" #Target CC 3.5, for example +NVFLAGS := -O3 -g -G -rdc=true #rdc=true needed for separable compilation +#NVFLAGS += $(GEN_SM35) +NVFLAGS += $(foreach includedir,$(program_CU_INCLUDE_DIRS),-I$(includedir)) + + +CUO_O_OBJECTS := ${program_CU_OBJS:.cuo=.cuo.o} + +OBJECTS = $(program_CU_OBJS) $(program_CXX_OBJS) $(program_C_OBJS) + +.PHONY: all clean distclean + +all: $(PROGRAM_NAME) + +debug: CXXFLAGS = -g -O0 -std=c++0x -Wall -pedantic -DDEBUG $(EXTRA_FLAGS) +debug: NVFLAGS = -O0 $(GEN_SM35) -g -G +debug: NVFLAGS += $(foreach includedir,$(program_CU_INCLUDE_DIRS),-I$(includedir)) +debug: $(PROGRAM_NAME) + +# Rule for compilation of CUDA source (C++ source can be handled automatically) +%.cuo: %.cu %.cuh + nvcc $(NVFLAGS) $(CPPFLAGS) -o $@ -dc $< + +# This is pretty ugly...details below +# The program depends on both C++ and CUDA Objects, but storing CUDA objects as .o files results in circular dependency +# warnings from Make. However, nvcc requires that object files for linking end in the .o extension, else it will throw +# an error saying that it doesn't know how to handle the file. Using a non .o rule for Make and then renaming the file +# to have the .o extension for nvcc won't suffice because the program will depend on the non .o file but the files in +# the directory after compilation will have a .o suffix. Thus, when one goes to recompile the code all files will be +# recompiled instead of just the ones that have been updated. +# +# The solution below solves these issues by silently converting the non .o suffix needed by make to the .o suffix +# required by nvcc, calling nvcc, and then converting back to the non .o suffix for future, dependency-based +# compilation. +$(PROGRAM_NAME): $(OBJECTS) + @ for cu_obj in $(program_CU_OBJS); \ + do \ + mv $$cu_obj $$cu_obj.o; \ + done #append a .o suffix for nvcc + nvcc $(NVFLAGS) $(CPPFLAGS) -o $@ $(program_CXX_OBJS) $(program_C_OBJS) $(CUO_O_OBJECTS) + @ for cu_obj in $(CUO_O_OBJECTS); \ + do \ + mv $$cu_obj $${cu_obj%.*}; \ + done #remove the .o for make + +clean: + @- $(RM) $(PROGRAM_NAME) $(OBJECTS) *~ + +distclean: clean + diff --git a/GradientBenchmark/main.cpp b/GradientBenchmarkInitial/main.cpp similarity index 99% copy from GradientBenchmark/main.cpp copy to GradientBenchmarkInitial/main.cpp index 21b5186..efa0d66 100644 --- a/GradientBenchmark/main.cpp +++ b/GradientBenchmarkInitial/main.cpp @@ -1,279 +1,279 @@ /** * @file main.cpp * @Author Christoph Schaaefer, EPFL (christophernstrerne.schaefer@epfl.ch) * @date October 2016 * @brief Benchmark for gradhalo function */ #include <iostream> #include <string.h> #include <cuda_runtime.h> #include "structure.h" #include <math.h> #include <sys/time.h> #include <fstream> /** for both gradient and second derivatives **/ static struct point rotateCoordinateSystem(struct point P, double theta); /** gradient **/ struct point module_potentialDerivatives_totalGradient(const runmode_param *runmode, const struct point *pImage, const struct Potential *lens); static struct point grad_halo(const struct point *pImage, const struct Potential *lens); /** PIEMD **/ static complex piemd_1derivatives_ci05(double x, double y, double eps, double rc); /** Potential **/ void module_readParameters_calculatePotentialparameter(Potential *lens); int main() { //Constant int small(10); int medium(100); int big(1000); //Variable creation struct timeval t1, t2, t3, t4; runmode_param runmodesmall; runmode_param runmodemedium; runmode_param runmodebig; point image; Potential *ilens; Potential lens[big]; //Initialisation runmodesmall.nhalos = small; runmodemedium.nhalos = medium; runmodebig.nhalos = big; image.x = image.y = 2; for (int i = 0; i <big; ++i){ ilens = &lens[i]; ilens->position.x = ilens->position.y = 0.; ilens->type = 8; ilens->ellipticity = 0.11; ilens->ellipticity_potential = 0.; ilens->ellipticity_angle = 0.; ilens->rcut = 5.; ilens->rcore = 1; ilens->weight = 0; ilens->rscale = 0; ilens->exponent = 0; ilens->alpha = 0.; ilens->einasto_kappacritic = 0; ilens->z = 0.4; module_readParameters_calculatePotentialparameter(ilens); } gettimeofday(&t1, 0); module_potentialDerivatives_totalGradient(&runmodesmall,&image, lens); gettimeofday(&t2, 0); module_potentialDerivatives_totalGradient(&runmodemedium,&image, lens); gettimeofday(&t3, 0); module_potentialDerivatives_totalGradient(&runmodebig,&image, lens); gettimeofday(&t4, 0); double time1 = (1000000.0*(t2.tv_sec-t1.tv_sec) + t2.tv_usec-t1.tv_usec)/1000000.0; double time2 = (1000000.0*(t3.tv_sec-t2.tv_sec) + t3.tv_usec-t2.tv_usec)/1000000.0; double time3 = (1000000.0*(t4.tv_sec-t3.tv_sec) + t4.tv_usec-t3.tv_usec)/1000000.0; std::cout << "Benchmark for Gradient Calculation "<< std::endl; std::cout << "Sample size " << small << ": " << time1 << std::endl; std::cout << "Sample size " << medium << ": " << time2 << std::endl; std::cout << "Sample size " << big << ": " << time3 << std::endl; std::ofstream myfile; -myfile.open ("BenchmarkGrad.txt"); +myfile.open ("BenchmarkGradSoA.txt"); myfile << "Benchmark for Gradient Calculation "<< std::endl; myfile << "Sample size " << small << ": " << time1 << std::endl; myfile << "Sample size " << medium << ": " << time2 << std::endl; myfile << "Sample size " << big << ": " << time3 << std::endl; myfile.close(); } struct point module_potentialDerivatives_totalGradient(const runmode_param *runmode, const struct point *pImage, const struct Potential *lens) { struct point grad, clumpgrad; grad.x=0; grad.y=0; for(int i=0; i<runmode->nhalos; i++){ clumpgrad=grad_halo(pImage,&lens[i]); //compute gradient for each clump separately if(clumpgrad.x == clumpgrad.x or clumpgrad.y == clumpgrad.y){ //nan check grad.x+=clumpgrad.x; grad.y+=clumpgrad.y; } // add the gradients } return(grad); } /**@brief Return the gradient of the projected lens potential for one clump * !!! You have to multiply by dlsds to obtain the true gradient * for the expressions, see the papers : JP Kneib & P Natarajan, Cluster Lenses, The Astronomy and Astrophysics Review (2011) for 1 and 2 * and JP Kneib PhD (1993) for 3 * * @param pImage point where the result is computed in the lens plane * @param lens mass distribution */ static struct point grad_halo(const struct point *pImage, const struct Potential *lens) { struct point true_coord, true_coord_rotation, result; double R, angular_deviation; complex zis; result.x = result.y = 0.; /*positionning at the potential center*/ true_coord.x = pImage->x - lens->position.x; // Change the origin of the coordinate system to the center of the clump true_coord.y = pImage->y - lens->position.y; switch (lens->type) { case(5): /*Elliptical Isothermal Sphere*/ /*rotation of the coordiante axes to match the potential axes*/ true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle); R=sqrt(true_coord_rotation.x*true_coord_rotation.x*(1-lens->ellipticity/3.)+true_coord_rotation.y*true_coord_rotation.y*(1+lens->ellipticity/3.)); //ellippot = ellipmass/3 result.x=(1-lens->ellipticity/3.)*lens->b0*true_coord_rotation.x/(R); result.y=(1+lens->ellipticity/3.)*lens->b0*true_coord_rotation.y/(R); break; case(8): /* PIEMD */ /*rotation of the coordiante axes to match the potential axes*/ true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle); /*Doing something....*/ zis = piemd_1derivatives_ci05(true_coord_rotation.x, true_coord_rotation.y, lens->ellipticity_potential, lens->rcore); result.x=lens->b0 * zis.re; result.y=lens->b0 * zis.im; break; default: std::cout << "ERROR: Grad 1 profil type of clump "<< lens->name << " unknown : "<< lens->type << std::endl; break; }; return result; } /**** usefull functions for PIEMD profile : see old lenstool ****/ /** I*w,v=0.5 Kassiola & Kovner, 1993 PIEMD, paragraph 4.1 * * Global variables used : * - none */ static complex piemd_1derivatives_ci05(double x, double y, double eps, double rc) { double sqe, cx1, cxro, cyro, rem2; complex zci, znum, zden, zis, zres; double norm; sqe = sqrt(eps); cx1 = (1. - eps) / (1. + eps); cxro = (1. + eps) * (1. + eps); cyro = (1. - eps) * (1. - eps); rem2 = x * x / cxro + y * y / cyro; /*zci=cpx(0.,-0.5*(1.-eps*eps)/sqe); znum=cpx(cx1*x,(2.*sqe*sqrt(rc*rc+rem2)-y/cx1)); zden=cpx(x,(2.*rc*sqe-y)); zis=pcpx(zci,lncpx(dcpx(znum,zden))); zres=pcpxflt(zis,b0);*/ // --> optimized code zci.re = 0; zci.im = -0.5 * (1. - eps * eps) / sqe; znum.re = cx1 * x; znum.im = 2.*sqe * sqrt(rc * rc + rem2) - y / cx1; zden.re = x; zden.im = 2.*rc * sqe - y; norm = zden.re * zden.re + zden.im * zden.im; // zis = znum/zden zis.re = (znum.re * zden.re + znum.im * zden.im) / norm; zis.im = (znum.im * zden.re - znum.re * zden.im) / norm; norm = zis.re; zis.re = log(sqrt(norm * norm + zis.im * zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) zis.im = atan2(zis.im, norm); // norm = zis.re; zres.re = zci.re * zis.re - zci.im * zis.im; // Re( zci*ln(zis) ) zres.im = zci.im * zis.re + zis.im * zci.re; // Im( zci*ln(zis) ) //zres.re = zis.re*b0; //zres.im = zis.im*b0; return(zres); } /// Useful functions // changes the coordinates of point P into a new basis (rotation of angle theta) // y' y x' // * | / // * | / theta // * | / // *|--------->x static struct point rotateCoordinateSystem(struct point P, double theta) { struct point Q; Q.x = P.x*cos(theta) + P.y*sin(theta); Q.y = P.y*cos(theta) - P.x*sin(theta); return(Q); } /** @brief This module function calculates profile depended information like the impactparameter b0 and the potential ellipticity epot * * @param lens: mass distribution for which to calculate parameters */ void module_readParameters_calculatePotentialparameter(Potential *lens){ switch (lens->type) { case(5): /*Elliptical Isothermal Sphere*/ //impact parameter b0 lens->b0 = 4* pi_c2 * lens->vdisp * lens->vdisp ; //ellipticity_potential lens->ellipticity_potential = lens->ellipticity/3 ; break; case(8): /* PIEMD */ //impact parameter b0 lens->b0 = 6.*pi_c2 * lens->vdisp * lens->vdisp; //ellipticity_parameter if ( lens->ellipticity == 0. && lens->ellipticity_potential != 0. ){ // emass is (a2-b2)/(a2+b2) lens->ellipticity = 2.*lens->ellipticity_potential / (1. + lens->ellipticity_potential * lens->ellipticity_potential); //printf("1 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); } else if ( lens->ellipticity == 0. && lens->ellipticity_potential == 0. ){ lens->ellipticity_potential = 0.00001; //printf("2 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); } else{ // epot is (a-b)/(a+b) lens->ellipticity_potential = (1. - sqrt(1 - lens->ellipticity * lens->ellipticity)) / lens->ellipticity; //printf("3 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); } break; default: std::cout << "ERROR: LENSPARA profil type of clump "<< lens->name << " unknown : "<< lens->type << std::endl; //printf( "ERROR: LENSPARA profil type of clump %s unknown : %d\n",lens->name, lens->type); break; }; } diff --git a/GradientBenchmark/structure.h b/GradientBenchmarkInitial/structure.h similarity index 100% copy from GradientBenchmark/structure.h copy to GradientBenchmarkInitial/structure.h