diff --git a/Benchmarks/Scripts/GridGradientBenchmark.sh b/Benchmarks/Scripts/GridGradientBenchmark.sh index 7f0c42b..4c9041b 100755 --- a/Benchmarks/Scripts/GridGradientBenchmark.sh +++ b/Benchmarks/Scripts/GridGradientBenchmark.sh @@ -1,30 +1,30 @@ #!/bin/bash echo "Starting Mapping Test" #if recompiling lenstool is needed #cd ../Libs/lenstool-6.8.1/ #make #cd ../../lenstool-hpc cd ../../src/ make clean make cd ../Benchmarks/GridGradientBenchmark -make -f Makefile.GPU clean -make -f Makefile.GPU -rm -r tmp +make -f Makefile.intel clean +make -f Makefile.intel +//rm -r tmp #./Bayesmap_GPU m1931.par T #./GridGradient_GPU ../ConfigFiles/TestResultParameter.par T ./GridGradient_GPU ../ConfigFiles/90Pot81.par T #./ChiBenchmark_GPU ../ConfigFiles/MarkusBenchmark.par T #./ChiBenchmark_GPU ../ConfigFiles/MarkusBenchmark1SIS.par T #cd ../Benchmarks/GradientBenchmark #make -f Makefile clean #make -f MakefileDouble #./GradientBenchmark echo "Finish Double test" diff --git a/src/gradient.cpp b/src/gradient.cpp index f2cd63f..6a6a3e3 100644 --- a/src/gradient.cpp +++ b/src/gradient.cpp @@ -1,964 +1,964 @@ #include #include #include //#include #include #include #include #include #include /* #ifdef __AVX__ #include "simd_math_avx.h" #endif #ifdef __AVX512F__ #include "simd_math_avx512f.h" #endif */ #include "structure_hpc.hpp" #include "gradient.hpp" #include "utils.hpp" //#include "iacaMarks.h" // // // /**@brief Return the gradient of the projected lens potential for one clump * !!! You have to multiply by dlsds to obtain the true gradient * for the expressions, see the papers : JP Kneib & P Natarajan, Cluster Lenses, The Astronomy and Astrophysics Review (2011) for 1 and 2 * and JP Kneib PhD (1993) for 3 * * @param pImage point where the result is computed in the lens plane * @param lens mass distribution */ // /// Useful functions // complex piemd_1derivatives_ci05(type_t x, type_t y, type_t eps, type_t rc) { type_t sqe, cx1, cxro, cyro, rem2; complex zci, znum, zden, zis, zres; type_t norm; // //std::cout << "piemd_lderivatives" << std::endl; sqe = sqrt(eps); cx1 = ((type_t) 1. - eps) / ((type_t) 1. + eps); cxro = ((type_t) 1. + eps) * ((type_t) 1. + eps); cyro = ((type_t) 1. - eps) * ((type_t) 1. - eps); // rem2 = x * x / cxro + y * y / cyro; /*zci=cpx(0.,-0.5*(1.-eps*eps)/sqe); znum=cpx(cx1*x,(2.*sqe*sqrt(rc*rc+rem2)-y/cx1)); zden=cpx(x,(2.*rc*sqe-y)); zis=pcpx(zci,lncpx(dcpx(znum,zden))); zres=pcpxflt(zis,b0);*/ // --> optimized code zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; znum.re = cx1 * x; znum.im = (type_t) 2.*sqe*sqrt(rc*rc + rem2) - y/cx1; zden.re = x; zden.im = (type_t) 2.*rc * sqe - y; norm = zden.re * zden.re + zden.im * zden.im; // zis = znum/zden zis.re = (znum.re * zden.re + znum.im * zden.im) / norm; zis.im = (znum.im * zden.re - znum.re * zden.im) / norm; norm = zis.re; zis.re = log(sqrt(norm * norm + zis.im * zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) zis.im = atan2(zis.im, norm); // norm = zis.re; zres.re = zci.re * zis.re - zci.im * zis.im; // Re( zci*ln(zis) ) zres.im = zci.im * zis.re + zis.im * zci.re; // Im( zci*ln(zis) ) // //zres.re = zis.re*b0; //zres.im = zis.im*b0; // return(zres); } // //// changes the coordinates of point P into a new basis (rotation of angle theta) //// y' y x' //// * | / //// * | / theta //// * | / //// *|--------->x /* inline struct point rotateCoordinateSystem(struct point P, double theta) { struct point Q; Q.x = P.x*cos(theta) + P.y*sin(theta); Q.y = P.y*cos(theta) - P.x*sin(theta); return(Q); } */ // // struct point grad_halo(const struct point *pImage, const struct Potential *lens) { struct point true_coord, true_coord_rot, result; type_t X, Y, R, angular_deviation, u; complex zis; // result.x = result.y = (type_t) 0.; // /*positionning at the potential center*/ // Change the origin of the coordinate system to the center of the clump true_coord.x = pImage->x - lens->position.x; true_coord.y = pImage->y - lens->position.y; //printf("x, y = %f, %f\n", lens->position.x, lens->position.y); // true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle); // switch (lens->type) { case(5): /*Elliptical Isothermal Sphere*/ /*rotation of the coordiante axes to match the potential axes*/ //true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle); // R=sqrt(true_coord_rot.x*true_coord_rot.x*((type_t) 1. - lens->ellipticity_potential) + true_coord_rot.y*true_coord_rot.y*((type_t) 1. + lens->ellipticity_potential)); //ellippot = ellipmass/3 result.x = ((type_t) 1. - lens->ellipticity_potential)*lens->b0*true_coord_rot.x/(R); result.y = ((type_t) 1. + lens->ellipticity_potential)*lens->b0*true_coord_rot.y/(R); break; case(8): /* PIEMD */ /*rotation of the coordiante axes to match the potential axes*/ /*Doing something....*/ complex zis = piemd_1derivatives_ci05(true_coord_rot.x, true_coord_rot.y, lens->ellipticity_potential, lens->rcore); // result.x = lens->b0*zis.re; result.y = lens->b0*zis.im; break; case(81): //PIEMD Kassiola & Kovner,1993 I0.5c-I0.5cut type_t t05; if ( lens->ellipticity_potential > (type_t) 2E-4 ) { //printf("1 "); t05 = lens->b0*lens->rcut/(lens->rcut - lens->rcore); complex zis = piemd_1derivatives_ci05(true_coord_rot.x, true_coord_rot.y, lens->ellipticity_potential, lens->rcore); complex zis_cut = piemd_1derivatives_ci05(true_coord_rot.x, true_coord_rot.y, lens->ellipticity_potential, lens->rcut); result.x = t05 * (zis.re - zis_cut.re); result.y = t05 * (zis.im - zis_cut.im); //printf(" g = %f %f\n", result.x, result.y); } else if (( u = true_coord_rot.x*true_coord_rot.x + true_coord_rot.y*true_coord_rot.y) > (type_t) 0. ) { //printf("2 "); // Circular dPIE Elliasdottir 2007 Eq A23 slighly modified for t05 X = lens->rcore; Y = lens->rcut; t05 = sqrt(u + X * X) - X - sqrt(u + Y * Y) + Y; // Faster and equiv to Elliasdottir (see Golse PhD) t05 *= lens->b0 * Y / (Y - X) / u; // 1/u because t05/sqrt(u) and normalised Q/sqrt(u) result.x = t05*true_coord_rot.x; result.y = t05*true_coord_rot.y; } else { //printf("3 "); result.x = (type_t) 0.; result.y = (type_t) 0.; } break; default: std::cout << "ERROR: Grad 1 profil type of clump "<< lens->name << " unknown : "<< lens->type << std::endl; break; }; result = rotateCoordinateSystem(result, -lens->ellipticity_angle); //printf(" rot grad = %.15f %.15f\n", result.x, result.y); return result; } // // // struct point module_potentialDerivatives_totalGradient(const int nhalos, const struct point* pImage, const struct Potential* lens) { struct point grad, clumpgrad; // grad.x = (type_t) 0.; grad.y = (type_t) 0.; // for(int i = 0; i < nhalos; i++) { clumpgrad = grad_halo(pImage, &lens[i]); //compute gradient for each clump separately //std::cout << clumpgrad.x << " " << clumpgrad.y << std::endl; //nan check //if(clumpgrad.x == clumpgrad.x or clumpgrad.y == clumpgrad.y) { // add the gradients grad.x += clumpgrad.x; grad.y += clumpgrad.y; } } // return(grad); } // // SOA versions, vectorizable // struct point module_potentialDerivatives_totalGradient_5_SOA(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA begins"); //printf("# module_potentialDerivatives_totalGradient_SIS_SOA begins\n"); // struct point grad, result; grad.x = (type_t) 0.; grad.y = (type_t) 0.; for(int i = shalos; i < shalos + nhalos; i++) { // struct point true_coord, true_coord_rotation; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; // true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); type_t ell_pot = lens->ellipticity_potential[i]; // type_t R = sqrt(true_coord_rotation.x*true_coord_rotation.x*((type_t) 1. - ell_pot) + true_coord_rotation.y*true_coord_rotation.y*((type_t) 1. + ell_pot)); // result.x = ((type_t) 1. - ell_pot)*lens->b0[i]*true_coord_rotation.x/R; result.y = ((type_t) 1. + ell_pot)*lens->b0[i]*true_coord_rotation.y/R; // result = rotateCoordinateSystem(result, -lens->ellipticity_angle[i]); // grad.x += result.x; grad.y += result.y; } return grad; } // // // struct point module_potentialDerivatives_totalGradient_5_SOA_v2(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins"); //printf("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins\n"); // struct point grad, result; grad.x = (type_t) 0.; grad.y = (type_t) 0.; for(int i = shalos; i < shalos + nhalos; i++) { struct point true_coord, true_coord_rotation; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; // //true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); type_t b0 = lens->b0[i]; // type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; // type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // type_t ell_pot = lens->ellipticity_potential[i]; // type_t val = x*x*((type_t) 1. - ell_pot) + y*y*((type_t) 1. + ell_pot); type_t R = 1./sqrtf(val); R = R*(1.5 - 0.5*val*R*R); result.x = ((type_t) 1. - ell_pot)*b0*x*R; result.y = ((type_t) 1. + ell_pot)*b0*y*R; // grad.x += result.x*cose - result.y*sine; grad.y += result.y*cose + result.x*sine; //grad.x = x/R; //grad.y = y/R; } return grad; } struct point module_potentialDerivatives_totalGradient_5_SOA_print(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos, int index) { asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins"); //printf("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins\n"); // struct point grad, result; std::ofstream myfile; grad.x = (type_t) 0.; grad.y = (type_t) 0.; #ifdef _double std::string name = "Double_"; #else std::string name = "Float_"; #endif for(int i = shalos; i < shalos + nhalos; i++) { // struct point true_coord, true_coord_rotation; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; //// /* myfile.open (name + "pImage->x_1.txt", std::ios_base::app); myfile << index << " " << pImage->x << std::setprecision(7) << " " << std::endl; myfile.close(); - //* + // myfile.open (name + "pImage->y_1.txt", std::ios_base::app); myfile << index << " " << pImage->y << std::setprecision(7) << " " << std::endl; myfile.close(); // - /* + myfile.open (name + "true_coord.x_2.txt", std::ios_base::app); myfile << index << " " << true_coord.x << std::setprecision(7) << " " << std::endl; myfile.close(); // myfile.open (name + "true_coord.y_2.txt", std::ios_base::app); myfile << index << " " << true_coord.y << std::setprecision(7) << " " << std::endl; myfile.close(); */// type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; //// myfile.open (name + "lens->anglecos[i]_3.txt", std::ios_base::app); myfile << index << " " << lens->anglecos[i] << std::setprecision(7) << " " << std::endl; myfile.close(); // myfile.open (name + "lens->anglesin[i]_3.txt", std::ios_base::app); myfile << index << " " << lens->anglesin[i] << std::setprecision(7) << " " << std::endl; myfile.close(); //// type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; //// /* myfile.open (name + "x_4.txt", std::ios_base::app); myfile << index << " " << x << std::setprecision(7) << " " << std::endl; myfile.close(); - /* + / myfile.open (name + "y_4.txt", std::ios_base::app); myfile << index << " " << y << std::setprecision(7) << " " << std::endl; myfile.close(); *//// type_t ell_pot = lens->ellipticity_potential[i]; // type_t R = sqrt(x*x*(1 - ell_pot) + y*y*(1 + ell_pot)); // result.x = (1 - ell_pot)*lens->b0[i]*x/R; result.y = (1 + ell_pot)*lens->b0[i]*y/R; //// /* myfile.open (name + "ell_pot_5.txt", std::ios_base::app); myfile << index << " " << ell_pot << std::setprecision(7) << " " << std::endl; myfile.close(); // myfile.open (name + "R_5.txt", std::ios_base::app); myfile << index << " " << R << std::setprecision(7) << " " << std::endl; myfile.close(); myfile.open (name + "x_6.txt", std::ios_base::app); myfile << index << " " << x << std::setprecision(7) << " " << std::endl; myfile.close(); - /* + myfile.open (name + "y_6.txt", std::ios_base::app); myfile << index << " " << y << std::setprecision(7) << " " << std::endl; myfile.close(); */// grad.x += result.x*cose - result.y*sine; grad.y += result.y*cose + result.x*sine; //// /* myfile.open (name + "grad.x_7.txt", std::ios_base::app); myfile << index << " " << grad.x << std::setprecision(7) << " " << std::endl; myfile.close(); - /* + myfile.open (name + "grad.y_7.txt", std::ios_base::app); myfile << index << " " << grad.y << std::setprecision(7) << " " << std::endl; myfile.close(); *//// } return grad; } // // // struct point module_potentialDerivatives_totalGradient_5_SOA_v2_novec(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA begins"); // struct point grad, result; grad.x = (type_t) 0.; grad.y = (type_t) 0.; -#pragma novec +#pragma novector for(int i = shalos; i < shalos + nhalos; i++) { // struct point true_coord, true_coord_rotation; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; // //true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; // type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; //: type_t R = sqrt(x*x*((type_t) 1. - lens->ellipticity[i]/(type_t) 3.) + y*y*((type_t) 1. + lens->ellipticity[i]/(type_t) 3.)); result.x = ((type_t) 1. - lens->ellipticity[i]/(type_t) 3.)*lens->b0[i]*x/R; result.y = ((type_t) 1. + lens->ellipticity[i]/(type_t) 3.)*lens->b0[i]*y/R; // grad.x += result.x*cose - result.y*sine; grad.y += result.y*cose + result.x*sine; } return grad; } // // // struct point module_potentialDerivatives_totalGradient_8_SOA(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_SOA begins"); // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // struct point grad, clumpgrad; grad.x = (type_t) 0.; grad.y = (type_t) 0.; // for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // struct point true_coord, true_coord_rot; //, result; //type_t R, angular_deviation; complex zis; // //result.x = result.y = 0.; // //@@printf("image_x = %f image_y = %f\n", pImage->x, pImage->y); true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; //printf("x = %f y = %f\n", true_coord.x, true_coord.y); /*positionning at the potential center*/ // Change the origin of the coordinate system to the center of the clump true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); // type_t x = true_coord_rot.x; type_t y = true_coord_rot.y; //@@printf("x = %f y = %f\n", x, y); type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; // //std::cout << "piemd_lderivatives" << std::endl; // type_t sqe = sqrt(eps); // type_t cx1 = ((type_t) 1. - eps)/((type_t) 1. + eps); type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps); type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres; type_t norm; // zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; //@@printf("zci = %f %f\n", zci.re, zci.im); // znum.re = cx1*x; znum.im = (type_t) 2.*sqe*sqrt(rc*rc + rem2) - y/cx1; // zden.re = x; zden.im = (type_t) 2.*rc*sqe - y; norm = (zden.re*zden.re + zden.im*zden.im); // zis = znum/zden //@@printf("norm = %f\n", norm); // zis.re = (znum.re*zden.re + znum.im*zden.im)/norm; zis.im = (znum.im*zden.re - znum.re*zden.im)/norm; //@@printf("zis = %f %f\n", zis.re, zis.im); norm = zis.re; zis.re = log(sqrt(norm*norm + zis.im*zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) zis.im = atan2(zis.im, norm); //@@printf("y,x = %f %f\n", zis.im, norm); // norm = zis.re; zres.re = zci.re*zis.re - zci.im*zis.im; // Re( zci*ln(zis) ) zres.im = zci.im*zis.re + zis.im*zci.re; // Im( zci*ln(zis) ) // //@@printf("zres: %f %f\n", zres.re, zres.im); // zis.re = zres.re; zis.im = zres.im; // //zres.re = zis.re*b0; //zres.im = zis.im*b0; // rotation clumpgrad.x = zis.re; clumpgrad.y = zis.im; clumpgrad = rotateCoordinateSystem(clumpgrad, -lens->ellipticity_angle[i]); // clumpgrad.x = lens->b0[i]*clumpgrad.x; clumpgrad.y = lens->b0[i]*clumpgrad.y; // //clumpgrad.x = lens->b0[i]*zis.re; //clumpgrad.y = lens->b0[i]*zis.im; //nan check //if(clumpgrad.x == clumpgrad.x or clumpgrad.y == clumpgrad.y) //{ // add the gradients grad.x += clumpgrad.x; grad.y += clumpgrad.y; //@@printf("grad: %f %f\n", grad.x, grad.y); //@@std::cout << "grad.x = " << grad.x << " grad.y = " << grad.y << std::endl; //} } //IACA_END; // return(grad); } // // // struct point module_potentialDerivatives_totalGradient_8_SOA_v2(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_8_SOA_v2 begins"); //std::cout << "# module_potentialDerivatives_totalGradient_8_SOA_v2 begins" << std::endl; // // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // struct point grad, clumpgrad; grad.x = 0; grad.y = 0; //printf("%d %d\n", shalos, nhalos); // for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // struct point true_coord, true_coord_rot; //, result; complex zis; type_t b0 = lens->b0[i]; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; // //true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); // type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; // type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; // type_t sqe = sqrt(eps); // type_t cx1 = ((type_t) 1. - eps)/((type_t) 1. + eps); type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps); type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres; type_t norm; // zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; // KERNEL(rc, zres) // grad.x += b0*(zres.re*cose - zres.im*sine); grad.y += b0*(zres.im*cose + zres.re*sine); // } //IACA_END; // return(grad); } // // // struct point module_potentialDerivatives_totalGradient_8_SOA_v2_novec(const struct point *pImage, const struct Potential_SOA *lens , int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_8_SOA_v2 begins"); //std::cout << "# module_potentialDerivatives_totalGradient_8_SOA_v2 begins" << std::endl; // // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // struct point grad, clumpgrad; grad.x = (type_t) 0.; grad.y = (type_t) 0.; //printf("%d %d\n", shalos, nhalos); // #pragma novector for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // struct point true_coord, true_coord_rot; //, result; complex zis; type_t b0 = lens->b0[i]; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; // //true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); // type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; // type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; // type_t sqe = sqrt(eps); // type_t cx1 = ((type_t) 1. - eps)/((type_t) 1. + eps); type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps); type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres; type_t norm; // zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; // KERNEL(rc, zres) // grad.x += b0*(zres.re*cose - zres.im*sine); grad.y += b0*(zres.im*cose + zres.re*sine); // } //IACA_END; // return(grad); } // // // struct point module_potentialDerivatives_totalGradient_81_SOA(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins"); //std::cout << "# module_potentialDerivatives_totalGradient_SOA begins" << std::endl; // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // struct point grad, clumpgrad; grad.x = (type_t) 0.; grad.y = (type_t) 0.; for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // struct point true_coord, true_coord_rot; //, result; //type_t R, angular_deviation; complex zis; // //result.x = result.y = 0.; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; /*positionning at the potential center*/ // Change the origin of the coordinate system to the center of the clump true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); // type_t x = true_coord_rot.x; type_t y = true_coord_rot.y; type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; type_t rcut = lens->rcut[i]; type_t b0 = lens->b0[i]; type_t t05 = b0*rcut/(rcut - rc); // type_t sqe = sqrt(eps); // type_t cx1 = ((type_t) 1. - eps)/((type_t) 1. + eps); type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps); type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres_rc, zres_rcut; type_t norm; // zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; // step 1 { KERNEL(rc, zres_rc) } // step 2 { KERNEL(rcut, zres_rcut) } zis.re = t05*(zres_rc.re - zres_rcut.re); zis.im = t05*(zres_rc.im - zres_rcut.im); // rotation clumpgrad.x = zis.re; clumpgrad.y = zis.im; clumpgrad = rotateCoordinateSystem(clumpgrad, -lens->ellipticity_angle[i]); // grad.x += clumpgrad.x; grad.y += clumpgrad.y; //} } //IACA_END; // return(grad); } // // // struct point module_potentialDerivatives_totalGradient_81_SOA_v2(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins"); //std::cout << "# module_potentialDerivatives_totalGradient_81_SOA begins" << std::endl; // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // struct point grad, clumpgrad; grad.x = (type_t) 0.; grad.y = (type_t) 0.; for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // struct point true_coord, true_coord_rot; //, result; //type_t R, angular_deviation; complex zis; // //result.x = result.y = 0.; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; /*positionning at the potential center*/ // Change the origin of the coordinate system to the center of the clump type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; type_t rcut = lens->rcut[i]; type_t b0 = lens->b0[i]; type_t t05 = b0*rcut/(rcut - rc); // type_t sqe = sqrt(eps); // type_t cx1 = ((type_t) 1. - eps)/((type_t) 1. + eps); type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps); type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres_rc, zres_rcut; type_t norm; // zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; // // step 1 { KERNEL(rc, zres_rc) } // step 2 { KERNEL(rcut, zres_rcut) } zis.re = t05*(zres_rc.re - zres_rcut.re); zis.im = t05*(zres_rc.im - zres_rcut.im); // rotation grad.x += (zis.re*cose - zis.im*sine); grad.y += (zis.im*cose + zis.re*sine); // } // return(grad); } // // // struct point module_potentialDerivatives_totalGradient_81_SOA_v2_novec(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins"); //std::cout << "# module_potentialDerivatives_totalGradient_81_SOA begins" << std::endl; // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 struct point grad, clumpgrad; grad.x = 0.; grad.y = 0.; #pragma novector for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // struct point true_coord, true_coord_rot; //, result; //type_t R, angular_deviation; complex zis; // //result.x = result.y = 0.; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; /*positionning at the potential center*/ // Change the origin of the coordinate system to the center of the clump type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; type_t rcut = lens->rcut[i]; type_t b0 = lens->b0[i]; type_t t05 = b0*rcut/(rcut - rc); // type_t sqe = sqrt(eps); // type_t cx1 = ((type_t) 1. - eps)/((type_t) 1. + eps); type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps); type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres_rc, zres_rcut; type_t norm; // zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; // // step 1 { KERNEL(rc, zres_rc) } // step 2 { KERNEL(rcut, zres_rcut) } zis.re = t05*(zres_rc.re - zres_rcut.re); zis.im = t05*(zres_rc.im - zres_rcut.im); // rotation grad.x += (zis.re*cose - zis.im*sine); grad.y += (zis.im*cose + zis.re*sine); // } // return(grad); } // // // typedef struct point (*halo_func_t) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos); halo_func_t halo_func[100] = { 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_5_SOA_v2, 0, 0, module_potentialDerivatives_totalGradient_8_SOA_v2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_81_SOA_v2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; // // // struct point module_potentialDerivatives_totalGradient_SOA(const struct point *pImage, const struct Potential_SOA *lens, int nhalos) { struct point grad, clumpgrad; // grad.x = clumpgrad.x = (type_t) 0.; grad.y = clumpgrad.y = (type_t) 0.; // int shalos = 0; while (shalos < nhalos) { int lens_type = lens->type[shalos]; int count = 1; while (lens->type[shalos + count] == lens_type and shalos + count < nhalos){ //int i = count; //std::cerr << lens->position_x[i] << lens->position_y[i] << lens->anglecos[i]<< lens->anglesin[i]<< lens->ellipticity_potential[i] << lens->rcore[i] << lens->rcut[i] << lens->b0[i] << std::endl; //std::cerr << "lens->type[shalos + count] = " << lens->type[shalos + count] << " " << lens_type << " " << lens_type << " " << " count = " << count << std::endl; count++; } // clumpgrad = (*halo_func[lens_type])(pImage, lens, shalos, count); // //std::cerr << lens->position_x[i] << lens->position_y[i] << lens->anglecos[i]<< lens->anglesin[i]<< lens->ellipticity_potential[i] << lens->rcore[i] << lens->rcut[i] << lens->b0[i] << std::endl; //std::cerr << "type = " << lens_type << " " << count << " " << nhalos << " " << " grad.x = " << clumpgrad.x << " grad.y = " << clumpgrad.y << std::endl; grad.x += clumpgrad.x; grad.y += clumpgrad.y; shalos += count; } return(grad); } // typedef struct point (*halo_func_t_novec) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos); // halo_func_t_novec halo_func_novec[100] = { 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_5_SOA_v2_novec, 0, 0, module_potentialDerivatives_totalGradient_8_SOA_v2_novec, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_81_SOA_v2_novec, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; // // // struct point module_potentialDerivatives_totalGradient_SOA_novec(const struct point *pImage, const struct Potential_SOA *lens, int nhalos) { struct point grad, clumpgrad; // grad.x = clumpgrad.x = 0; grad.y = clumpgrad.y = 0; // int shalos = 0; // //module_potentialDerivatives_totalGradient_81_SOA(pImage, lens, 0, nhalos); //return; /* int* p_type = &(lens->type)[0]; int* lens_type = (int*) malloc(nhalos*sizeof(int)); memcpy(lens_type, &(lens->type)[0], nhalos*sizeof(int)); */ //quicksort(lens_type, nhalos); // while (shalos < nhalos) { int lens_type = lens->type[shalos]; int count = 1; while (lens->type[shalos + count] == lens_type) count++; //std::cerr << "type = " << lens_type << " " << count << " " << shalos << std::endl; // clumpgrad = (*halo_func_novec[lens_type])(pImage, lens, shalos, count); // grad.x += clumpgrad.x; grad.y += clumpgrad.y; shalos += count; } return(grad); } diff --git a/src/gradient2_GPU.cu b/src/gradient2_GPU.cu index 38a3147..2e5a8e0 100644 --- a/src/gradient2_GPU.cu +++ b/src/gradient2_GPU.cu @@ -1,340 +1,340 @@ #include #include "grid_gradient2_GPU.cuh" #include "gradient2_GPU.cuh" #include "structure_hpc.hpp" #define BLOCK_SIZE_X 8 #define BLOCK_SIZE_Y 16 //#define ROT #define _SHARED_MEM #ifdef _SHARED_MEM #define SHARED __shared__ #warning "shared memory" extern __shared__ type_t shared[]; #else #define SHARED #endif #define Nx 1 #define Ny 0 extern "C" { double myseconds(); } __device__ matrix mdci05_gpu(type_t x, type_t y, type_t eps, type_t rc, type_t b0) { matrix res; type_t ci, sqe, cx1, cxro, cyro, wrem; type_t didyre, didyim, didxre;// didxim; type_t cx1inv, den1, num2, den2; sqe = sqrt(eps); cx1 = (1. - eps) / (1. + eps); cx1inv = 1. / cx1; cxro = (1. + eps) * (1. + eps); /* rem^2=x^2/(1+e^2) + y^2/(1-e^2) Eq 2.3.6*/ cyro = (1. - eps) * (1. - eps); ci = 0.5 * (1. - eps * eps) / sqe; wrem = sqrt(rc * rc + x * x / cxro + y * y / cyro); /*wrem^2=w^2+rem^2 with w core radius*/ den1 = 2.*sqe * wrem - y * cx1inv; den1 = cx1 * cx1 * x * x + den1 * den1; num2 = 2.*rc * sqe - y; den2 = x * x + num2 * num2; didxre = ci * ( cx1 * (2.*sqe * x * x / cxro / wrem - 2.*sqe * wrem + y * cx1inv) / den1 + num2 / den2 ); didyre = ci * ( (2 * sqe * x * y * cx1 / cyro / wrem - x) / den1 + x / den2 ); didyim = ci * ( (2 * sqe * wrem * cx1inv - y * cx1inv * cx1inv - 4 * eps * y / cyro + 2 * sqe * y * y / cyro / wrem * cx1inv) / den1 - num2 / den2 ); //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printf("didxre %f didyre %f didyim %f ",didxre,didyre,didyim); res.a = b0 * didxre; res.b = res.d = b0 * didyre; //(didyre+didxim)/2.; res.c = b0 * didyim; return(res); } __device__ void printmat_gpu(matrix A){ printf("A: %lf, B: %lf, C:%lf, D:%lf \n",A.a,A.b,A.c,A.d); } __device__ matrix module_potentialDerivatives_totalGradient2_81_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos){ //asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins"); //std::cout << "# module_potentialDerivatives_totalGradient_81_SOA begins" << std::endl; // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // - type_t t05, vala,valb,valc,vald; + type_t t05; struct matrix grad2, clump, clumpcore, clumpcut; grad2.a = 0; grad2.b = 0; grad2.c = 0; grad2.d = 0; #if 1 for(int i = shalos; i < shalos + nhalos; i++) { - struct point true_coord, true_coord_rotation; + struct point true_coord; //True coord true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; // //std::cerr << "start" << std::endl; //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printf("start\n"); //Rotation type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; // type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // 81 comput t05 = lens->rcut[i] / (lens->rcut[i] - lens->rcore[i]); //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0)printf("t05 %f rcut: %f, rcore: %f, b0:%f \n",t05, lens->rcut[i],lens->rcore[i], lens->b0[i]); clumpcore = mdci05_gpu(x, y, lens->ellipticity_potential[i], lens->rcore[i], lens->b0[i]); //////////////////////////// #if 0 //matrix res; type_t rc = lens->rcore[i]; type_t eps =lens->ellipticity_potential[i]; type_t ci, sqe, cx1, cxro, cyro, wrem; type_t didyre, didyim, didxre;// didxim; type_t cx1inv, den1, num2, den2; sqe = sqrt( lens->ellipticity_potential[i]); cx1 = (1. - eps) / (1. + eps); cx1inv = 1. / cx1; cxro = (1. + eps) * (1. + eps); /* rem^2=x^2/(1+e^2) + y^2/(1-e^2) Eq 2.3.6*/ cyro = (1. - eps) * (1. - eps); ci = 0.5 * (1. - eps * eps) / sqe; wrem = sqrt(rc * rc + x * x / cxro + y * y / cyro); /*wrem^2=w^2+rem^2 with w core radius*/ den1 = 2.*sqe * wrem - y * cx1inv; den1 = cx1 * cx1 * x * x + den1 * den1; num2 = 2.*rc * sqe - y; den2 = x * x + num2 * num2; didxre = ci * ( cx1 * (2.*sqe * x * x / cxro / wrem - 2.*sqe * wrem + y * cx1inv) / den1 + num2 / den2 ); if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printf("cyro %f ",cyro); if(cyro == 0) printf("I %d fucked up!",threadIdx.x); //didyre = ci * ( (2 * sqe * x * y * cx1 / wrem - x) + x ); didyre = ci * ( (2 * sqe * x * y * cx1 / wrem - x) / den1 + x / den2 ); didyre = didyre /cyro; //didyre = ci * ( (2 * sqe * x * y * cx1 / cyro / wrem - x) / den1 + x / den2 ); didyim = ci * ( (2 * sqe * wrem * cx1inv - y * cx1inv * cx1inv - 4 * eps * y / cyro + 2 * sqe * y * y / cyro / wrem * cx1inv) / den1 - num2 / den2 ); //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printf("didxre %f didyre %f didyim %f ",didxre,didyre,didyim); clumpcore.a = lens->b0[i] * didxre; clumpcore.b = clumpcore.d = lens->b0[i] * didyre; //(didyre+didxim)/2.; clumpcore.c = lens->b0[i] * didyim; #endif //////////////////////////////////////////////// clumpcut = mdci05_gpu(x, y, lens->ellipticity_potential[i], lens->rcut[i], lens->b0[i]); ///////////////////////////////////// #if 0 rc = lens->rcut[i]; sqe = sqrt( lens->ellipticity_potential[i]); cx1 = (1. - eps) / (1. + eps); cx1inv = 1. / cx1; cxro = (1. + eps) * (1. + eps); /* rem^2=x^2/(1+e^2) + y^2/(1-e^2) Eq 2.3.6*/ cyro = (1. - eps) * (1. - eps); ci = 0.5 * (1. - eps * eps) / sqe; wrem = sqrt(rc * rc + x * x / cxro + y * y / cyro); /*wrem^2=w^2+rem^2 with w core radius*/ den1 = 2.*sqe * wrem - y * cx1inv; den1 = cx1 * cx1 * x * x + den1 * den1; num2 = 2.*rc * sqe - y; den2 = x * x + num2 * num2; didxre = ci * ( cx1 * (2.*sqe * x * x / cxro / wrem - 2.*sqe * wrem + y * cx1inv) / den1 + num2 / den2 ); didyre = ci * ( (2 * sqe * x * y * cx1 / cyro / wrem - x) / den1 + x / den2 ); didyim = ci * ( (2 * sqe * wrem * cx1inv - y * cx1inv * cx1inv - 4 * eps * y / cyro + 2 * sqe * y * y / cyro / wrem * cx1inv) / den1 - num2 / den2 ); //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printf("didxre %f didyre %f didyim %f ",didxre,didyre,didyim); clumpcut.a = lens->b0[i] * didxre; clumpcut.b = clumpcut.d = lens->b0[i] * didyre; //(didyre+didxim)/2.; clumpcut.c = lens->b0[i] * didyim; #endif /////////// //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printmat_gpu(clumpcore); //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printmat_gpu(clumpcut); // #if 1 clumpcore.a = t05 * (clumpcore.a - clumpcut.a); clumpcore.b = t05 * (clumpcore.b - clumpcut.b); clumpcore.c = t05 * (clumpcore.c - clumpcut.c); clumpcore.d = t05 * (clumpcore.d - clumpcut.d); //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printmat_gpu(clumpcore); //rotation matrix 1 clumpcut.a = clumpcore.a * cose + clumpcore.b * -sine; clumpcut.b = clumpcore.a * sine + clumpcore.b * cose; clumpcut.c = clumpcore.d * sine + clumpcore.c * cose; clumpcut.d = clumpcore.d * cose + clumpcore.c * -sine; //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printmat_gpu(clumpcut); //rotation matrix 2 clump.a = cose * clumpcut.a + -sine * clumpcut.d; clump.b = cose * clumpcut.b + -sine * clumpcut.c; clump.c = sine * clumpcut.b + cose * clumpcut.c; clump.d = sine * clumpcut.a + cose * clumpcut.d; #endif //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printmat_gpu(clump); //vala += clump.a; //valb += clump.b; //valc += clump.c; //vald += clump.d; grad2.a += clump.a; grad2.b += clump.b; grad2.c += clump.c; grad2.d += clump.d; //if(blockIdx.x*blockDim.x + threadIdx.x == 0 && blockIdx.y*blockDim.y + threadIdx.y == 0) printmat_gpu(grad2); } // //grad2.a = vala; //grad2.b = valb; //grad2.c = valc; //grad2.d = vald; #endif return(grad2); } #if 1 typedef struct matrix (*halo_func2_GPU_t) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos); __constant__ halo_func2_GPU_t halo_func2_GPU[100] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient2_81_SOA_GPU, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #endif __global__ void module_potentialDerivatives_totalGradient2_SOA_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct Potential_SOA *lens, const struct grid_param *frame, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart) { struct point image_point; struct matrix grad, clumpgrad; // int col = blockIdx.x*blockDim.x + threadIdx.x; int row = blockIdx.y*blockDim.y + threadIdx.y; //if(row == 0 && col == 0) printf("Start GPU \n"); // if ((row + jstart < nbgridcells_y) && (col + istart < nbgridcells_x)) { int index = row*nbgridcells_x + col; // Create temp grad variable to minimise writing to global memory grid_grad grad.a = 0; grad.b = 0; grad.c = 0; grad.d = 0; // image_point.x = frame->xmin + (col + istart)*dx; image_point.y = frame->ymin + (row + jstart)*dy; // int shalos = 0; //if(row == 0 && col == 0) printf("Start 2 GPU \n"); //if(row == 0 && col == 0) std::cout << std::endl;; while (shalos < nhalos) { int lens_type = lens->type[shalos]; int count = 1; while (lens->type[shalos + count] == lens_type and shalos + count < nhalos) count++; // //if(row == 0 && col == 0) printf("point = %p \n", halo_func2_GPU[lens_type] ); // //clumpgrad = (*halo_func2_GPU[lens_type])(&image_point, lens, shalos, count); clumpgrad = module_potentialDerivatives_totalGradient2_81_SOA_GPU(&image_point, lens, shalos, count); // grad.a += clumpgrad.a; grad.b += clumpgrad.b; grad.c += clumpgrad.c; grad.d += clumpgrad.d; shalos += count; } // Write to global memory grid_grad2_a[index] = grad.a; grid_grad2_b[index] = grad.b; grid_grad2_c[index] = grad.c; grid_grad2_d[index] = grad.d; } } __global__ void module_potentialDerivatives_totalGradient2_SOA_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int nhalos) { struct point image_point; struct matrix grad, clumpgrad; // int col = blockIdx.x*blockDim.x + threadIdx.x; int row = blockIdx.y*blockDim.y + threadIdx.y; // if ((row < nbgridcells) && (col < nbgridcells)) { // type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1); type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1); // int index = row*nbgridcells + col; // Create temp grad variable to minimise writing to global memory grid_grad grad.a = 0; grad.b = 0; grad.c = 0; grad.d = 0; // image_point.x = frame->xmin + col*dx; image_point.y = frame->ymin + row*dy; // int shalos = 0; while (shalos < nhalos) { int lens_type = lens->type[shalos]; int count = 1; while (lens->type[shalos + count] == lens_type and shalos + count < nhalos) count++; // clumpgrad = (*halo_func2_GPU[lens_type])(&image_point, lens, shalos, count); // //clumpgrad = module_potentialDerivatives_totalGradient2_81_SOA_GPU(&image_point, lens, shalos, count); grad.a += clumpgrad.a; grad.b += clumpgrad.b; grad.c += clumpgrad.c; grad.d += clumpgrad.d; shalos += count; } // Write to global memory grid_grad2_a[index] = grad.a; grid_grad2_b[index] = grad.b; grid_grad2_c[index] = grad.c; grid_grad2_d[index] = grad.d; //if ((row == 0) && (col == 9)) //printf("%f %f: %f %f\n", image_point.x, image_point.y, grid_grad_x[index], grid_grad_y[index]); } } diff --git a/src/gradient_GPU.cu b/src/gradient_GPU.cu index 4f0395d..6b769b5 100644 --- a/src/gradient_GPU.cu +++ b/src/gradient_GPU.cu @@ -1,1000 +1,999 @@ #include #include "grid_gradient_GPU.cuh" #include "gradient.hpp" #include "gradient_GPU.cuh" #include "gradient.hpp" #define BLOCK_SIZE_X 8 #define BLOCK_SIZE_Y 16 //#define ROT #define _SHARED_MEM #ifdef _SHARED_MEM #define SHARED __shared__ #warning "shared memory" extern __shared__ type_t shared[]; #else #define SHARED #endif #define Nx 1 #define Ny 0 extern "C" { double myseconds(); } /* void module_potentialDerivatives_totalGradient_SOA_CPU_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_cpu, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos); void module_potentialDerivatives_totalGradient_SOA_CPU_GPU_v2(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_cpu, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos); void calculate_cossin_values(type_t *theta_cos, type_t *theta_sin, type_t *angles, int nhalos ){ for(int i = 0 ; i < nhalos; i++) { theta_cos[i] = cos(angles[i]); theta_sin[i] = sin(angles[i]); } } */ - +#if 0 __global__ void module_potentialDerivatives_totalGradient_8_SOA_GPU_cur(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int shalos, int nhalos) { //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins"); // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // - struct point grad, clumpgrad, image_point; + struct point grad, image_point; // grad.x = 0; grad.y = 0; // int col = blockIdx.x*blockDim.x + threadIdx.x; int row = blockIdx.y*blockDim.y + threadIdx.y; // if ((row < nbgridcells) && (col < nbgridcells)) { // int index = row*nbgridcells + col; // //grid_grad_x[index] = 0.; //grid_grad_y[index] = 0.; // type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1); type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1); // image_point.x = frame->xmin + col*dx; image_point.y = frame->ymin + row*dy; // for(int i = shalos; i < shalos + nhalos; i++) { - struct point true_coord, true_coord_rot; //, result; //type_t R, angular_deviation; complex zis; // // positionning at the potential center // Change the origin of the coordinate system to the center of the clump // //@@if ((row == Ny) && (col == Nx)) printf("image_x = %f, %f image_y = %f, %f\n", image_point.x, frame->xmin, image_point.y,frame->ymin); type_t true_coord_x = image_point.x - __ldg(&lens->position_x[i]); type_t true_coord_y = image_point.y - __ldg(&lens->position_y[i]); //if ((row == Ny) && (col == Nx)) printf("x = %f y = %f\n", true_coord_x, true_coord_y); // type_t cosi = __ldg(&lens->anglecos[i]); type_t sinu = __ldg(&lens->anglesin[i]); // type_t x = true_coord_x*cosi + true_coord_y*sinu; type_t y = true_coord_y*cosi - true_coord_x*sinu; // //if ((row == Ny) && (col == Nx)) printf("x = %f y = %f\n", x, y); // type_t eps = __ldg(&lens->ellipticity_potential[i]); // type_t sqe = sqrt(eps); // type_t rem2 = x*x/((1. + eps)*(1. + eps)) + y*y/((1. - eps)*(1. - eps)); // complex zci; complex znum, zden, zres; type_t norm; // zci.re = 0; zci.im = -0.5*(1. - eps*eps)/sqe; //@@if ((col == Nx) && (row == Ny)) printf("%d %d, zis: %f %f\n", row, col, zci.re, zci.im); // type_t rc = __ldg(&lens->rcore[i]); type_t cx1 = (1. - eps)/(1. + eps); znum.re = cx1*x; znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1; // zden.re = x; zden.im = 2.*rc*sqe - y; norm = (zden.re*zden.re + zden.im*zden.im); // zis = znum/zden //@@if ((col == Nx) && (row == Ny)) printf("norm = %f\n", norm); // zis.re = (znum.re*zden.re + znum.im*zden.im)/norm; zis.im = (znum.im*zden.re - znum.re*zden.im)/norm; // //@@if ((col == Nx) && (row == Ny)) printf("%d %d, zis: %f %f\n", row, col, zis.re, zis.im); // norm = zis.re; // zis.re = log(sqrt(norm*norm + zis.im*zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) zis.im = atan2(zis.im, norm); // //@@if ((col == Nx) && (row == Ny)) printf("%d %d, zis: %f %f\n", row, col, zis.re, zis.im); // zres.re = zci.re*zis.re - zci.im*zis.im; // Re( zci*ln(zis) ) zres.im = zci.im*zis.re + zis.im*zci.re; // Im( zci*ln(zis) ) // //@@if ((col == Nx) && (row == Ny)) printf("%d %d, zres: %f %f\n", row, col, zres.re, zres.im); // type_t b0 = __ldg(&lens->b0[i]); grad.x += b0*(zres.re*cosi - zres.im*sinu); grad.y += b0*(zres.im*cosi + zres.re*sinu); //@@if ((col == Nx) && (row == Ny)) printf("grad: %f %f\n", grad.x, grad.y); } //IACA_END; // grid_grad_x[index] = grad.x; grid_grad_y[index] = grad.y; //if ((row == 0) && (col == 9)) //printf("%f %f: %f %f\n", image_point.x, image_point.y, grid_grad_x[index], grid_grad_y[index]); } } __global__ void module_potentialDerivatives_totalGradient_8_SOA_GPU_SM2(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int shalos, int nhalos) { // //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins"); // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // type_t grad_x, grad_y; type_t clumpgrad_x, clumpgrad_y; type_t image_point_x, image_point_y; // SHARED type_t cosi [200]; SHARED type_t sinu [200]; SHARED type_t rc [200]; SHARED type_t b0 [200]; SHARED type_t epsi [200]; SHARED type_t position_x[200]; SHARED type_t position_y[200]; SHARED type_t rsqe [200]; SHARED type_t sonepeps [200]; SHARED type_t sonemeps [200]; // grad_x = 0; grad_y = 0; // int col = blockIdx.x*blockDim.x + threadIdx.x; int row = blockIdx.y*blockDim.y + threadIdx.y; int ithread = threadIdx.y*blockDim.x + threadIdx.x; // int index = row*nbgridcells + col; // //grid_grad_x[index] = 0.; //grid_grad_y[index] = 0.; // type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1); type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1); // image_point_x = frame->xmin + col*dx; image_point_y = frame->ymin + row*dy; // int i = ithread; if (i < nhalos) { cosi[i] = __ldg(&lens->anglecos [shalos + i]); sinu[i] = __ldg(&lens->anglesin [shalos + i]); position_x[i] = __ldg(&lens->position_x [shalos + i]); position_y[i] = __ldg(&lens->position_y [shalos + i]); rc[i] = __ldg(&lens->rcore [shalos + i]); b0[i] = __ldg(&lens->b0 [shalos + i]); epsi[i] = __ldg(&lens->ellipticity_potential[shalos + i]); //sonemeps[i] = 1 - epsi[i]; //sonepeps[i] = 1 + epsi[i]; rsqe[i] = sqrt(epsi[i]); } __syncthreads(); // if ((row < nbgridcells) && (col < nbgridcells)) { for(int i = 0; i < nhalos; i++) { // type_t true_coord_x = image_point_x - position_x[i]; type_t true_coord_y = image_point_y - position_y[i]; // type_t x = true_coord_x*cosi[i] + true_coord_y*sinu[i]; type_t y = true_coord_y*cosi[i] - true_coord_x*sinu[i]; // type_t eps = epsi[i]; //type_t onemeps = 1 - eps; //type_t onepeps = 1 + eps; // //type_t eps = epsi[i]; type_t onemeps = sonemeps[i]; type_t onepeps = sonepeps[i]; // //type_t sqe = sqrt(eps); type_t sqe = rsqe[i]; type_t rem2 = x*x/(onepeps*onepeps) + y*y/(onemeps*onemeps); // complex zis; // type_t znum_re, znum_im; type_t zres_re, zres_im; type_t norm; type_t zden_re, zden_im; type_t zis_re, zis_im; // type_t zci_im = -0.5*(1. - eps*eps)/sqe; // type_t cx1 = onemeps/onepeps; // znum_re = cx1*x; znum_im = 2.*sqe*sqrt(rc[i]*rc[i] + rem2) - y/cx1; // zden_re = x; zden_im = 2.*rc[i]*sqe - y; // norm = (x*x + zden_im*zden_im); // zis = znum/zden zis.re = (znum_re*x + znum_im*zden_im)/norm; zis.im = (znum_im*x - znum_re*zden_im)/norm; // norm = zis.re; // zis.re = log(sqrt(norm*norm + zis.im*zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) zis.im = atan2(zis.im, norm); // zres_re = - zci_im*zis.im; // Re( zci*ln(zis) ) zres_im = zci_im*zis.re; // Im( zci*ln(zis) ) // grad_x += b0[i]*(zres_re*cosi[i] - zres_im*sinu[i]); grad_y += b0[i]*(zres_im*cosi[i] + zres_re*sinu[i]); } // grid_grad_x[index] = grad_x; grid_grad_y[index] = grad_y; //__syncthreads(); } } // // // __global__ void module_potentialDerivatives_totalGradient_8_SOA_GPU_SM3(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int shalos, int nhalos) { // //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins"); // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // type_t grad_x, grad_y; type_t clumpgrad_x, clumpgrad_y; type_t image_point_x, image_point_y; // SHARED type_t cosi [200]; SHARED type_t sinu [200]; SHARED type_t rci [200]; SHARED type_t b0 [200]; SHARED type_t epsi [200]; SHARED type_t position_x[200]; SHARED type_t position_y[200]; SHARED type_t rsqe [200]; //SHARED type_t sgrad_x [(BLOCK_SIZE_X + 1)*BLOCK_SIZE_Y]; //SHARED type_t sgrad_y [(BLOCK_SIZE_X + 1)*BLOCK_SIZE_Y]; //SHARED type_t sonepeps [200]; //SHARED type_t sonemeps [200]; // grad_x = 0; grad_y = 0; // int row = blockIdx.y*blockDim.y + threadIdx.y; int col = blockIdx.x*blockDim.x + threadIdx.x; // //int loc_row = threadIdx.x; //int loc_col = threadIdx.y*blockDim.x + threadIdx.x; // //int grid_size = nbgridcells/blockDim.y; // //if (threadIdx.x == 0) printf("%d %d %d: row = %d, col = %d, grid_size = %d\n", blockIdx.y, gridDim.y, threadIdx.y, row, col, grid_size); // type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1); type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1); //if (threadIdx.x == 0) printf("dx = %f, dy = %f\n", dx, dy); // image_point_x = frame->xmin + col*dx; image_point_y = frame->ymin + row*dy; // //int iloc = threadIdx.x*blockDim.y + threadIdx.y; int iglob = row*nbgridcells + col; int numThreads = blockDim.x*blockDim.y; // for (int i = 0; i < (nhalos + numThreads - 1)/numThreads; ++i) { int iloc = threadIdx.y*blockDim.x + threadIdx.x + i*numThreads; if (iloc < nhalos) { cosi[iloc] = __ldg(&lens->anglecos [shalos + iloc]); sinu[iloc] = __ldg(&lens->anglesin [shalos + iloc]); position_x[iloc] = __ldg(&lens->position_x [shalos + iloc]); position_y[iloc] = __ldg(&lens->position_y [shalos + iloc]); rci[iloc] = __ldg(&lens->rcore [shalos + iloc]); b0[iloc] = __ldg(&lens->b0 [shalos + iloc]); epsi[iloc] = __ldg(&lens->ellipticity_potential[shalos + iloc]); rsqe[iloc] = sqrt(epsi[iloc]); } } __syncthreads(); // if ((row < nbgridcells) && (col < nbgridcells)) { // for(int i = 0; i < nhalos; i++) { //int index = iloc; #if 1 type_t rc = rci[i]; type_t eps = epsi[i]; type_t onemeps = 1 - eps; type_t onepeps = 1 + eps; // type_t sqe = rsqe[i]; type_t cx1 = onemeps/onepeps; // // //type_t zci_im = 1; type_t zci_im = -0.5*(1. - eps*eps)/sqe; type_t inv_onepeps = 1./(onepeps*onepeps); type_t inv_onemeps = 1./(onemeps*onemeps); #endif // { //KERNEL_8; type_t grad_x = grad_y = 0.; type_t true_coord_y = image_point_y - position_y[i]; type_t true_coord_x = image_point_x - position_x[i]; KERNEL_8_reg(0); grid_grad_x[iglob + 0] += grad_x; grid_grad_y[iglob + 0] += grad_y; } /* { //KERNEL_8; type_t grad_x = grad_y = 0.; type_t true_coord_y = image_point_y - position_y[i]; type_t true_coord_x = image_point_x - position_x[i] + BLOCK_SIZE_X/2*dx; KERNEL_8_reg(0); grid_grad_x[iglob + BLOCK_SIZE_X/2] += grad_x; grid_grad_y[iglob + BLOCK_SIZE_X/2] += grad_y; } */ // } } } // // // __global__ void module_potentialDerivatives_totalGradient_8_SOA_GPU_SM4(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA lens, const struct grid_param *frame, int nbgridcells, int shalos, int nhalos/*, type_t* dtimer*/) { // //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins"); // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // - type_t grad_x, grad_y; - type_t clumpgrad_x, clumpgrad_y; - type_t image_point_x, image_point_y; + //type_t grad_x, grad_y; + //type_t clumpgrad_x, clumpgrad_y; + //type_t image_point_x, image_point_y; // // - type_t* cosi = &shared[0*nhalos]; - type_t* sinu = &shared[1*nhalos]; - type_t* rc = &shared[2*nhalos]; - type_t* b0 = &shared[3*nhalos]; - type_t* epsi = &shared[4*nhalos]; - type_t* position_x = &shared[5*nhalos]; - type_t* position_y = &shared[6*nhalos]; - type_t* rsqe = &shared[7*nhalos]; + //type_t* cosi = &shared[0*nhalos]; + //type_t* sinu = &shared[1*nhalos]; + //type_t* rc = &shared[2*nhalos]; + //type_t* b0 = &shared[3*nhalos]; + //type_t* epsi = &shared[4*nhalos]; + //type_t* position_x = &shared[5*nhalos]; + //type_t* position_y = &shared[6*nhalos]; + //type_t* rsqe = &shared[7*nhalos]; //SHARED type_t sonepeps [200]; //SHARED type_t sonemeps [200]; // grad_x = 0; grad_y = 0; // int grid_size = nbgridcells/gridDim.y; int row = blockIdx.x*blockDim.x + threadIdx.x; int col = (blockIdx.y*blockDim.y + threadIdx.y)/**grid_size*/; // // #if 0 //SHARED double sgrad_x [ if (/*(threadIdx.x == 0) &&*/ (blockIdx.x == 0)) { if (threadIdx.x == 0) printf("blockDim.x = %d, blockIdx.x = %d grimdDim.x = %d threadIdx.x = %d\n", blockDim.x, blockIdx.x, gridDim.x, threadIdx.x); if (threadIdx.x == 0) printf("blockDim.y = %d, blockIdx.y = %d grimdDim.y = %d threadIdx.y = %d\n", blockDim.y, blockIdx.y, gridDim.y, threadIdx.y); if (threadIdx.x == 0) printf("row = %d, col = %d, grid_size = %d\n", row, col, grid_size); } __syncthreads(); #endif - type_t* sgrad_x = &shared[8*nhalos]; + //type_t* sgrad_x = &shared[8*nhalos]; type_t* sgrad_y = &shared[8*nhalos + (grid_size + 1)*blockDim.x]; // // //grid_grad_x[index] = 0.; //grid_grad_y[index] = 0.; // type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1); type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1); // //if (threadIdx.x == 0) printf("dx = %f, dy = %f\n", dx, dy); // image_point_x = frame->xmin + col*dx; image_point_y = frame->ymin + row*dy; return; // - int i = 0; + //int i = 0; #if 0 for (; i < nhalos; i = i + blockDim.x) { int pos = threadIdx.x + i; /*if ((threadIdx.x == 0) && (blockIdx.x == 0))*/ printf("pos = %d\n"); __syncthreads(); // cosi[pos] = __ldg(&lens->anglecos [shalos + pos]); sinu[pos] = __ldg(&lens->anglesin [shalos + pos]); position_x[pos] = __ldg(&lens->position_x [shalos + pos]); position_y[pos] = __ldg(&lens->position_y [shalos + pos]); rc[pos] = __ldg(&lens->rcore [shalos + pos]); b0[pos] = __ldg(&lens->b0 [shalos + pos]); epsi[pos] = __ldg(&lens->ellipticity_potential[shalos + pos]); rsqe[pos] = sqrt(epsi[i]); // } #endif #if 0 if (threadIdx.x == 0) for (; i < nhalos; i += 1) { cosi[i] = __ldg(&lens->anglecos [shalos + i]); sinu[i] = __ldg(&lens->anglesin [shalos + i]); position_x[i] = __ldg(&lens->position_x [shalos + i]); position_y[i] = __ldg(&lens->position_y [shalos + i]); rc[i] = __ldg(&lens->rcore [shalos + i]); b0[i] = __ldg(&lens->b0 [shalos + i]); epsi[i] = __ldg(&lens->ellipticity_potential[shalos + i]); rsqe[i] = sqrt(epsi[i]); } #endif __syncthreads(); //if ((row == col == 0)) printf("shared mem done...\n"); // if (row < nbgridcells) { //for(int icol = 0; icol < grid_size; ++icol){ // if (col + icol < nbgridcells){ //grad_x = grad_y = 0.; int index = row*nbgridcells + col; // for(int i = 0; i < nhalos; i++) { int sindex = threadIdx.x*grid_size; #if 0 type_t eps = epsi[i]; type_t onemeps = 1 - eps; type_t onepeps = 1 + eps; // type_t sqe = rsqe[i]; type_t cx1 = onemeps/onepeps; // // //type_t x = true_coord_y*sinu[i]; //type_t y = true_coord_y*cosi[i]; type_t true_coord_y = image_point_y - position_y[i]; type_t true_coord_x = image_point_x - position_x[i] /*+ icol*dx*/; // complex zci; zci.im = -0.5*(1. - eps*eps)/sqe; type_t inv_onepeps = 1./(onepeps*onepeps); type_t inv_onemeps = 1./(onemeps*onemeps); #endif // for(int icol = 0; icol < grid_size; ++icol) { if (col + icol < nbgridcells) { #if 0 if ((row == 1) && (col == 1)) printf("%d %d: %f %f\n", row, col, true_coord_x, true_coord_y); true_coord_x = image_point_x - position_x[i] + icol*dx; // //x += true_coord_x*cosi[i]; //y -= true_coord_x*sinu[i]; type_t x = true_coord_x*cosi[i] + true_coord_y*sinu[i]; type_t y = true_coord_y*cosi[i] - true_coord_x*sinu[i]; // //if ((row == 1) && (col == 0)) printf("i = %d, eps = %f\n", i, eps); // //double eps = epsi[i]; //double onemeps = sonemeps[i]; //double onepeps = sonepeps[i]; // //double sqe = sqrt(eps); //double rem2 = x*x/(onepeps*onepeps) + y*y/(onemeps*onemeps); type_t rem2 = x*x*inv_onepeps + y*y*inv_onemeps; // // //double znum_re, znum_im; //double zres_re, zres_im; //double zden_re, zden_im; //double zis_re, zis_im; type_t norm; // complex zis; complex znum; complex zden; complex zres; // //double cx1 = onemeps/onepeps; // znum.re = cx1*x; znum.im = 2.*sqe*sqrt(rc[i]*rc[i] + rem2) - y/cx1; // zden.re = x; zden.im = 2.*rc[i]*sqe - y; // norm = (x*x + zden.im*zden.im); // zis = znum/zden zis.re = (znum.re*x + znum.im*zden.im)/norm; zis.im = (znum.im*x - znum.re*zden.im)/norm; // norm = zis.re; // zis.re = log(sqrt(norm*norm + zis.im*zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) zis.im = atan2(zis.im, norm); // zres.re = - zci.im*zis.im; // Re( zci*ln(zis) ) zres.im = zci.im*zis.re; // Im( zci*ln(zis) ) // grid_grad_x[index] += b0[i]*(zres.re*cosi[i] - zres.im*sinu[i]); grid_grad_y[index] += b0[i]*(zres.im*cosi[i] + zres.re*sinu[i]); #endif //sgrad_x[sindex] += (float) sindex; sgrad_y[sindex] += (float) -sindex; //sindex++; // //grid_grad_x[index] += grad_x; //grid_grad_y[index] += grad_y; } // } } __syncthreads(); return; // #if 0 int sindex = threadIdx.x*grid_size; for(int icol = 0; icol < grid_size; ++icol) { if (col + icol < nbgridcells) { grid_grad_x[index + col] = sgrad_x[sindex]; grid_grad_y[index + col] = sgrad_y[sindex]; sindex++; } } __syncthreads(); #endif } } __global__ void module_potentialDerivatives_totalGradient_8_SOA_GPU_v2(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int i, int nhalos) { //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins"); // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // - struct point grad, clumpgrad, image_point; + struct point grad, image_point; grad.x = 0; grad.y = 0; // int row = blockIdx.y * blockDim.y + threadIdx.y; int col = blockIdx.x * blockDim.x + threadIdx.x; // if ((row < nbgridcells) && (col < nbgridcells)) { // int index = col*nbgridcells + row; // //grid_grad_x[index] = 0.; //grid_grad_y[index] = 0.; // type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1); type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1); // #if 0 /*SHARED*/ type_t img_pt[2]; if ((row == 0) && (col == 0)) { img_pt[0] = frame->xmin + col*dx; img_pt[1] = frame->ymin + row*dy; } __syncthreads(); #else image_point.x = frame->xmin + col*dx; image_point.y = frame->ymin + row*dy; #endif // // //for(int i = shalos; i < shalos + nhalos; i++) //{ //IACA_START; // - struct point true_coord, true_coord_rot; //, result; + struct point true_coord; //, result; //type_t R, angular_deviation; - complex zis; + //complex zis; // //result.x = result.y = 0.; // #if 0 true_coord.x = img_pt[0] - __ldg(&lens->position_x[i]); true_coord.y = img_pt[1] - __ldg(&lens->position_y[i]); #else true_coord.x = image_point.x - __ldg(&lens->position_x[i]); true_coord.y = image_point.y - __ldg(&lens->position_y[i]); #endif type_t cosi = __ldg(&lens->anglecos[i]); type_t sinu = __ldg(&lens->anglesin[i]); // positionning at the potential center // Change the origin of the coordinate system to the center of the clump type_t x = true_coord.x*cosi + true_coord.y*sinu; type_t y = true_coord.y*cosi - true_coord.x*sinu; // type_t eps = __ldg(&lens->ellipticity_potential[i]); // type_t sqe = sqrt(eps); // type_t rem2 = x*x/((1. + eps)*(1. + eps)) + y*y/((1. - eps)*(1. - eps)); // - complex zci; + //complex zci; complex znum, zden, zres; type_t norm; // zci.im = -0.5*(1. - eps*eps)/sqe; // type_t rc = __ldg(&lens->rcore[i]); type_t cx1 = (1. - eps)/(1. + eps); znum.re = cx1*x; znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1; // zden.re = x; zden.im = 2.*rc*sqe - y; norm = (zden.re*zden.re + zden.im*zden.im); // zis = znum/zden // zis.re = (znum.re*zden.re + znum.im*zden.im)/norm; zis.im = (znum.im*zden.re - znum.re*zden.im)/norm; // type_t b0 = __ldg(&lens->b0[i]); - grad.x += b0*(zres.re*cosi - zres.im*sinu); - grad.y += b0*(zres.im*cosi + zres.re*sinu); + //grad.x += b0*(zres.re*cosi - zres.im*sinu); + //grad.y += b0*(zres.im*cosi + zres.re*sinu); // grid_grad_x[index] += grad.x; grid_grad_y[index] += grad.y; } } - +#endif __device__ point module_potentialDerivatives_totalGradient_5_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos){ //asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins"); //printf("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins\n"); // struct point grad, result; grad.x = 0; grad.y = 0; for(int i = shalos; i < shalos + nhalos; i++) { // - struct point true_coord, true_coord_rotation; + struct point true_coord; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; // //true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; // type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // type_t ell_pot = lens->ellipticity_potential[i]; // type_t b0_inv_R = lens->b0[i]/sqrt(x*x*(1 - ell_pot) + y*y*(1 + ell_pot)); // result.x = (1 - ell_pot)*x*b0_inv_R; result.y = (1 + ell_pot)*y*b0_inv_R; // grad.x += result.x*cose - result.y*sine; grad.y += result.y*cose + result.x*sine; } return grad; } __device__ point module_potentialDerivatives_totalGradient_8_SOA_GPU(const struct point *image_point, const struct Potential_SOA *lens, int shalos, int nhalos){ struct point grad; grad.x = 0; grad.y = 0; // for(int i = shalos; i < shalos + nhalos; i++) { complex zis; // positioning at the potential center // Change the origin of the coordinate system to the center of the clump //@@if ((row == Ny) && (col == Nx)) printf("image_x = %f, %f image_y = %f, %f\n", image_point.x, frame->xmin, image_point.y,frame->ymin); type_t true_coord_x = image_point->x - __ldg(&lens->position_x[i]); type_t true_coord_y = image_point->y - __ldg(&lens->position_y[i]); //if ((row == Ny) && (col == Nx)) printf("x = %f y = %f\n", true_coord_x, true_coord_y); // type_t cosi = __ldg(&lens->anglecos[i]); type_t sinu = __ldg(&lens->anglesin[i]); // type_t x = true_coord_x*cosi + true_coord_y*sinu; type_t y = true_coord_y*cosi - true_coord_x*sinu; // //if ((row == Ny) && (col == Nx)) printf("x = %f y = %f\n", x, y); // type_t eps = __ldg(&lens->ellipticity_potential[i]); // type_t sqe = sqrt(eps); // type_t rem2 = x*x/((1. + eps)*(1. + eps)) + y*y/((1. - eps)*(1. - eps)); // complex zci; complex znum, zden, zres; type_t norm; // zci.re = 0; zci.im = -0.5*(1. - eps*eps)/sqe; //@@if ((col == Nx) && (row == Ny)) printf("%d %d, zis: %f %f\n", row, col, zci.re, zci.im); // type_t rc = __ldg(&lens->rcore[i]); type_t cx1 = (1. - eps)/(1. + eps); znum.re = cx1*x; znum.im = 2.*sqe*sqrt(rc*rc + rem2) - y/cx1; // zden.re = x; zden.im = 2.*rc*sqe - y; norm = (zden.re*zden.re + zden.im*zden.im); // zis = znum/zden //@@if ((col == Nx) && (row == Ny)) printf("norm = %f\n", norm); // zis.re = (znum.re*zden.re + znum.im*zden.im)/norm; zis.im = (znum.im*zden.re - znum.re*zden.im)/norm; // //@@if ((col == Nx) && (row == Ny)) printf("%d %d, zis: %f %f\n", row, col, zis.re, zis.im); // norm = zis.re; // zis.re = log(sqrt(norm*norm + zis.im*zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) zis.im = atan2(zis.im, norm); // //@@if ((col == Nx) && (row == Ny)) printf("%d %d, zis: %f %f\n", row, col, zis.re, zis.im); // zres.re = zci.re*zis.re - zci.im*zis.im; // Re( zci*ln(zis) ) zres.im = zci.im*zis.re + zis.im*zci.re; // Im( zci*ln(zis) ) // //@@if ((col == Nx) && (row == Ny)) printf("%d %d, zres: %f %f\n", row, col, zres.re, zres.im); // type_t b0 = __ldg(&lens->b0[i]); grad.x += b0*(zres.re*cosi - zres.im*sinu); grad.y += b0*(zres.im*cosi + zres.re*sinu); //@@if ((col == Nx) && (row == Ny)) printf("grad: %f %f\n", grad.x, grad.y); } //IACA_END; // return(grad); } __device__ point module_potentialDerivatives_totalGradient_81_SOA_GPU(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos){ //asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins"); //std::cout << "# module_potentialDerivatives_totalGradient_81_SOA begins" << std::endl; // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // - struct point grad, clumpgrad; + struct point grad; grad.x = 0; grad.y = 0; for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // - struct point true_coord, true_coord_rot; //, result; + struct point true_coord; //, result; //type_t R, angular_deviation; complex zis; // //result.x = result.y = 0.; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; /*positionning at the potential center*/ // Change the origin of the coordinate system to the center of the clump type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; type_t rcut = lens->rcut[i]; type_t b0 = lens->b0[i]; type_t t05 = b0*rcut/(rcut - rc); // type_t sqe = sqrt(eps); // type_t cx1 = (1. - eps)/(1. + eps); type_t cxro = (1. + eps)*(1. + eps); type_t cyro = (1. - eps)*(1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres_rc, zres_rcut; type_t norm; // zci.re = 0; zci.im = -0.5*(1. - eps*eps)/sqe; // // step 1 { KERNEL(rc, zres_rc) } // step 2 { KERNEL(rcut, zres_rcut) } zis.re = t05*(zres_rc.re - zres_rcut.re); zis.im = t05*(zres_rc.im - zres_rcut.im); // rotation grad.x += (zis.re*cose - zis.im*sine); grad.y += (zis.im*cose + zis.re*sine); // } // return(grad); } #if 1 typedef struct point (*halo_func_GPU_t) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos); __constant__ halo_func_GPU_t halo_func_GPU[100] = { 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_5_SOA_GPU, 0, 0, module_potentialDerivatives_totalGradient_8_SOA_GPU, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_81_SOA_GPU, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #endif __global__ void module_potentialDerivatives_totalGradient_SOA_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart) { struct point grad, clumpgrad, image_point; // int col = blockIdx.x*blockDim.x + threadIdx.x; int row = blockIdx.y*blockDim.y + threadIdx.y; // if ((row + jstart < nbgridcells_y) && (col + istart < nbgridcells_x)) { // //double dx = (frame->xmax - frame->xmin)/(nbgridcells-1); //double dy = (frame->ymax - frame->ymin)/(nbgridcells-1); // int index = row*nbgridcells_x + col; // Create temp grad variable to minimise writing to global memory grid_grad grad.x = 0.; grad.y = 0.; // image_point.x = frame->xmin + (col + istart)*dx; image_point.y = frame->ymin + (row + jstart)*dy; // int shalos = 0; while (shalos < nhalos) { int lens_type = lens->type[shalos]; int count = 1; while (lens->type[shalos + count] == lens_type) count++; // //if(row == 0 && col == 0) printf("type = %d, count %d , shalos %d \n", lens_type,count,shalos ); // clumpgrad = (*halo_func_GPU[lens_type])(&image_point, lens, shalos, count); // grad.x += clumpgrad.x; grad.y += clumpgrad.y; shalos += count; } // Write to global memory grid_grad_x[index] = grad.x; grid_grad_y[index] = grad.y; //if ((row == 0) && (col == 9)) //printf("%f %f: %f %f\n", image_point.x, image_point.y, grid_grad_x[index], grid_grad_y[index]); } } __global__ void module_potentialDerivatives_totalGradient_SOA_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int nhalos) { struct point grad, clumpgrad, image_point; // int col = blockIdx.x*blockDim.x + threadIdx.x; int row = blockIdx.y*blockDim.y + threadIdx.y; // if ((row < nbgridcells) && (col < nbgridcells)) { // type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1); type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1); // int index = row*nbgridcells + col; // Create temp grad variable to minimise writing to global memory grid_grad grad.x = 0; grad.y = 0; // image_point.x = frame->xmin + col*dx; image_point.y = frame->ymin + row*dy; // int shalos = 0; while (shalos < nhalos) { int lens_type = lens->type[shalos]; int count = 1; while (lens->type[shalos + count] == lens_type) count++; // //if(row == 0 && col == 0) printf("type = %d, count %d , shalos %d \n", lens_type,count,shalos ); // clumpgrad = (*halo_func_GPU[lens_type])(&image_point, lens, shalos, count); // grad.x += clumpgrad.x; grad.y += clumpgrad.y; shalos += count; } // Write to global memory grid_grad_x[index] = grad.x; grid_grad_y[index] = grad.y; //if ((row == 0) && (col == 9)) //printf("%f %f: %f %f\n", image_point.x, image_point.y, grid_grad_x[index], grid_grad_y[index]); } } diff --git a/src/grid_gradient2_GPU.cu b/src/grid_gradient2_GPU.cu index 72aff04..13540d0 100644 --- a/src/grid_gradient2_GPU.cu +++ b/src/grid_gradient2_GPU.cu @@ -1,224 +1,220 @@ // // // #include #include "grid_gradient2_GPU.cuh" #include "gradient2_GPU.cuh" #include #define BLOCK_SIZE_X 32 #define BLOCK_SIZE_Y 16 //#define ROT #define _SHARED_MEM #ifdef _SHARED_MEM #define SHARED __shared__ #warning "shared memory" extern __shared__ type_t shared[]; #else #define SHARED #endif #define Nx 1 #define Ny 0 #define cudasafe extern "C" { type_t myseconds(); } __global__ void module_potentialDerivatives_totalGradient2_SOA_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int nhalos); //// void module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(type_t *grid_grad2_a, type_t *grid_grad_b, type_t *grid_grad2_c, type_t *grid_grad_d, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos); // void gradient2_grid_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart); // //void //module_potentialDerivatives_totalGradient_SOA_CPU_GPU_v2(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_cpu, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos); // // // void gradient2_grid_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos ,int nbgridcells) { type_t dx = (frame->xmax - frame->xmin)/(nbgridcells - 1); type_t dy = (frame->ymax - frame->ymin)/(nbgridcells - 1); // gradient2_grid_GPU(grid_grad2_a, grid_grad2_b,grid_grad2_c, grid_grad2_d, frame, lens, nhalos, dx, dy, nbgridcells, nbgridcells, 0, 0); } // // // void gradient2_grid_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart) { int nBlocks_gpu = 0; // Define the number of threads per block the GPU will use cudaDeviceProp properties_gpu; cudaGetDeviceProperties(&properties_gpu, 0); // Get properties of 0th GPU in use if (properties_gpu.maxThreadsDim[0]type = (int *) malloc(sizeof(int)); // Allocate variables on the GPU cudasafe(cudaMalloc( (void**)&(lens_kernel), sizeof(Potential_SOA)),"Gradient2gpu.cu : Alloc Potential_SOA: " ); cudasafe(cudaMalloc( (void**)&(type_gpu), nhalos*sizeof(int)),"Gradient2gpu.cu : Alloc type_gpu: " ); cudasafe(cudaMalloc( (void**)&(lens_x_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc x_gpu: " ); cudasafe(cudaMalloc( (void**)&(lens_y_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc y_gpu: " ); cudasafe(cudaMalloc( (void**)&(b0_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc b0_gpu: " ); cudasafe(cudaMalloc( (void**)&(angle_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc angle_gpu: " ); cudasafe(cudaMalloc( (void**)&(epot_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc epot_gpu: " ); cudasafe(cudaMalloc( (void**)&(rcore_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc rcore_gpu: " ); cudasafe(cudaMalloc( (void**)&(rcut_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc rcut_gpu: " ); cudasafe(cudaMalloc( (void**)&(anglecos_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc anglecos_gpu: " ); cudasafe(cudaMalloc( (void**)&(anglesin_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc anglesin_gpu: " ); cudasafe(cudaMalloc( (void**)&(frame_gpu), sizeof(grid_param)),"Gradient2gpu.cu : Alloc frame_gpu: " ); cudasafe(cudaMalloc( (void**)&(grid_grad2_a_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_a_gpu: " ); cudasafe(cudaMalloc( (void**)&(grid_grad2_b_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_b_gpu: " ); cudasafe(cudaMalloc( (void**)&(grid_grad2_c_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_c_gpu: " ); cudasafe(cudaMalloc( (void**)&(grid_grad2_d_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_d_gpu: " ); // Copy values to the GPU // cudasafe(cudaMemcpy(type_gpu,lens->type , nhalos*sizeof(int),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy type_gpu: " ); cudasafe(cudaMemcpy(lens_x_gpu,lens->position_x , nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy x_gpu: " ); cudasafe(cudaMemcpy(lens_y_gpu,lens->position_y , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy y_gpu: " ); cudasafe(cudaMemcpy(b0_gpu,lens->b0 , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2pu.cu : Copy b0_gpu: " ); cudasafe(cudaMemcpy(angle_gpu,lens->ellipticity_angle , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy angle_gpu: " ); cudasafe(cudaMemcpy(epot_gpu, lens->ellipticity_potential, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy epot_gpu: " ); cudasafe(cudaMemcpy(rcore_gpu, lens->rcore, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy rcore_gpu: " ); cudasafe(cudaMemcpy(rcut_gpu, lens->rcut, nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy rcut_gpu: " ); cudasafe(cudaMemcpy(anglecos_gpu, lens->anglecos, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy anglecos: " ); cudasafe(cudaMemcpy(anglesin_gpu, lens->anglesin, nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy anglesin: " ); cudasafe(cudaMemcpy(frame_gpu, frame, sizeof(grid_param), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy fame_gpu: " ); // lens_gpu->type = type_gpu; lens_gpu->position_x = lens_x_gpu; lens_gpu->position_y = lens_y_gpu; lens_gpu->b0 = b0_gpu; lens_gpu->ellipticity_angle = angle_gpu; lens_gpu->ellipticity_potential = epot_gpu; lens_gpu->rcore = rcore_gpu; lens_gpu->rcut = rcut_gpu; lens_gpu->anglecos = anglecos_gpu; lens_gpu->anglesin = anglesin_gpu; // cudaMemcpy(lens_kernel, lens_gpu, sizeof(Potential_SOA), cudaMemcpyHostToDevice); // type_t time = -myseconds(); //module_potentialDerivatives_totalGradient_SOA_CPU_GPU(grid_grad_x_gpu, grid_grad_y_gpu, frame_gpu, lens_kernel, nbgridcells_x, nhalos); module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(grid_grad2_a_gpu, grid_grad2_b_gpu, grid_grad2_c_gpu, grid_grad2_d_gpu, frame_gpu, lens_kernel, nhalos, dx, dy, nbgridcells_x, nbgridcells_y, istart, jstart); // //cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU"); cudaDeviceSynchronize(); time += myseconds(); //std::cout << " kernel time = " << time << " s." << std::endl; // cudasafe(cudaMemcpy( grid_grad2_a, grid_grad2_a_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost )," --- Gradient2gpu.cu : Copy source_a_gpu: " ); cudasafe(cudaMemcpy( grid_grad2_b, grid_grad2_b_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost)," --- Gradient2gpu.cu : Copy source_b_gpu: " ); cudasafe(cudaMemcpy( grid_grad2_c, grid_grad2_c_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost )," --- Gradient2gpu.cu : Copy source_c_gpu: " ); cudasafe(cudaMemcpy( grid_grad2_d, grid_grad2_d_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost)," --- Gradient2gpu.cu : Copy source_d_gpu: " ); // //printf("-----> %f %f \n",grid_grad_x[Nx], grid_grad_y[Ny]); // Free GPU memory cudaFree(lens_gpu); cudaFree(type_gpu); cudaFree(lens_x_gpu); cudaFree(lens_y_gpu); cudaFree(b0_gpu); cudaFree(angle_gpu); cudaFree(epot_gpu); cudaFree(rcore_gpu); cudaFree(rcut_gpu); cudaFree(anglecos_gpu); cudaFree(anglesin_gpu); cudaFree(grid_grad2_a_gpu); cudaFree(grid_grad2_b_gpu); cudaFree(grid_grad2_c_gpu); cudaFree(grid_grad2_d_gpu); } void module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart) { int GRID_SIZE_X = (nbgridcells_x + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks int GRID_SIZE_Y = (nbgridcells_y + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y; // printf("grid_size_x = %d, grid_size_y = %d, nbgridcells_x = %d, nbgridcells_y = %d, istart = %d, jstart = %d (split)\n", GRID_SIZE_X, GRID_SIZE_Y, nbgridcells_x, nbgridcells_y, istart, jstart); // dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1); dim3 grid (GRID_SIZE_X , GRID_SIZE_Y); - // - int count = nhalos; //printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (double) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(double)); printf("nhalos = %d, size of shared memory = %lf (split)\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t)); // cudaMemset(grid_grad2_a, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t)); cudaMemset(grid_grad2_b, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t)); cudaMemset(grid_grad2_c, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t)); cudaMemset(grid_grad2_d, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t)); // //module_potentialDerivatives_totalGradient_SOA_GPU<<>> (grid_grad_x, grid_grad_y, lens, frame, nhalos, nbgridcells_x); module_potentialDerivatives_totalGradient2_SOA_GPU<<>> (grid_grad2_a, grid_grad2_b,grid_grad2_c, grid_grad2_d, lens_gpu, frame, nhalos, dx, dy, nbgridcells_x, nbgridcells_y, istart, jstart); cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU_8_SOA_GPU"); // cudaDeviceSynchronize(); printf("GPU kernel done...\n"); } // // // void module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos) { // int GRID_SIZE_X = (nbgridcells + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks int GRID_SIZE_Y = (nbgridcells + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y; // type_t* timer = (type_t *) malloc((int) nbgridcells*nbgridcells*sizeof(type_t)); - type_t* dtimer; // dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1); dim3 grid (GRID_SIZE_X , GRID_SIZE_Y); // - int count = nhalos; //printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(type_t)); printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t)); // cudaMemset(grid_grad2_a, 0, nbgridcells*nbgridcells*sizeof(type_t)); cudaMemset(grid_grad2_b, 0, nbgridcells*nbgridcells*sizeof(type_t)); cudaMemset(grid_grad2_c, 0, nbgridcells*nbgridcells*sizeof(type_t)); cudaMemset(grid_grad2_d, 0, nbgridcells*nbgridcells*sizeof(type_t)); // //module_potentialDerivatives_totalGradient2_SOA_GPU<<>> (grid_grad2_a, grid_grad2_b,grid_grad2_c, grid_grad2_d, lens_gpu, frame, nbgridcells, nhalos); cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU_8_SOA_GPU"); // cudaDeviceSynchronize(); printf("GPU kernel done...\n"); } diff --git a/src/grid_gradient_GPU.cu b/src/grid_gradient_GPU.cu index b25f4d3..d5b8db3 100644 --- a/src/grid_gradient_GPU.cu +++ b/src/grid_gradient_GPU.cu @@ -1,358 +1,343 @@ // // // #include #include "grid_gradient_GPU.cuh" #include "gradient_GPU.cuh" #define BLOCK_SIZE_X 32 #define BLOCK_SIZE_Y 16 //#define ROT #define _SHARED_MEM #ifdef _SHARED_MEM #define SHARED __shared__ #warning "shared memory" extern __shared__ type_t shared[]; #else #define SHARED #endif #define Nx 1 #define Ny 0 #define cudasafe extern "C" { type_t myseconds(); } __global__ void module_potentialDerivatives_totalGradient_SOA_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int nhalos); // void module_potentialDerivatives_totalGradient_SOA_CPU_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart); // void module_potentialDerivatives_totalGradient_SOA_CPU_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos); // void gradient_grid_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart); // //void //module_potentialDerivatives_totalGradient_SOA_CPU_GPU_v2(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_cpu, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos); // // // void gradient_grid_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos ,int nbgridcells) { type_t dx = (frame->xmax - frame->xmin)/(nbgridcells - 1); type_t dy = (frame->ymax - frame->ymin)/(nbgridcells - 1); // gradient_grid_GPU(grid_grad_x, grid_grad_y, frame, lens, nhalos, dx, dy, nbgridcells, nbgridcells, 0, 0); } // // // void gradient_grid_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart) { int nBlocks_gpu = 0; // Define the number of threads per block the GPU will use cudaDeviceProp properties_gpu; cudaGetDeviceProperties(&properties_gpu, 0); // Get properties of 0th GPU in use if (properties_gpu.maxThreadsDim[0]type = (int *) malloc(sizeof(int)); // Allocate variables on the GPU cudasafe(cudaMalloc( (void**)&(lens_kernel), sizeof(Potential_SOA)),"Gradientgpu.cu : Alloc Potential_SOA: " ); cudasafe(cudaMalloc( (void**)&(type_gpu), nhalos*sizeof(int)),"Gradientgpu.cu : Alloc type_gpu: " ); cudasafe(cudaMalloc( (void**)&(lens_x_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc x_gpu: " ); cudasafe(cudaMalloc( (void**)&(lens_y_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc y_gpu: " ); cudasafe(cudaMalloc( (void**)&(b0_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc b0_gpu: " ); cudasafe(cudaMalloc( (void**)&(angle_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc angle_gpu: " ); cudasafe(cudaMalloc( (void**)&(epot_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc epot_gpu: " ); cudasafe(cudaMalloc( (void**)&(rcore_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc rcore_gpu: " ); cudasafe(cudaMalloc( (void**)&(rcut_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc rcut_gpu: " ); cudasafe(cudaMalloc( (void**)&(anglecos_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc anglecos_gpu: " ); cudasafe(cudaMalloc( (void**)&(anglesin_gpu), nhalos*sizeof(type_t)),"Gradientgpu.cu : Alloc anglesin_gpu: " ); cudasafe(cudaMalloc( (void**)&(frame_gpu), sizeof(grid_param)),"Gradientgpu.cu : Alloc frame_gpu: " ); cudasafe(cudaMalloc( (void**)&(grid_grad_x_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradientgpu.cu : Alloc source_x_gpu: " ); cudasafe(cudaMalloc( (void**)&(grid_grad_y_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradientgpu.cu : Alloc source_y_gpu: " ); // Copy values to the GPU // cudasafe(cudaMemcpy(type_gpu,lens->type , nhalos*sizeof(int),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy type_gpu: " ); cudasafe(cudaMemcpy(lens_x_gpu,lens->position_x , nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy x_gpu: " ); cudasafe(cudaMemcpy(lens_y_gpu,lens->position_y , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy y_gpu: " ); cudasafe(cudaMemcpy(b0_gpu,lens->b0 , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy b0_gpu: " ); cudasafe(cudaMemcpy(angle_gpu,lens->ellipticity_angle , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy angle_gpu: " ); cudasafe(cudaMemcpy(epot_gpu, lens->ellipticity_potential, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy epot_gpu: " ); cudasafe(cudaMemcpy(rcore_gpu, lens->rcore, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy rcore_gpu: " ); cudasafe(cudaMemcpy(rcut_gpu, lens->rcut, nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy rcut_gpu: " ); cudasafe(cudaMemcpy(anglecos_gpu, lens->anglecos, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradientgpu.cu : Copy anglecos: " ); cudasafe(cudaMemcpy(anglesin_gpu, lens->anglesin, nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy anglesin: " ); cudasafe(cudaMemcpy(frame_gpu, frame, sizeof(grid_param), cudaMemcpyHostToDevice),"Gradientgpu.cu : Copy fame_gpu: " ); // lens_gpu->type = type_gpu; lens_gpu->position_x = lens_x_gpu; lens_gpu->position_y = lens_y_gpu; lens_gpu->b0 = b0_gpu; lens_gpu->ellipticity_angle = angle_gpu; lens_gpu->ellipticity_potential = epot_gpu; lens_gpu->rcore = rcore_gpu; lens_gpu->rcut = rcut_gpu; lens_gpu->anglecos = anglecos_gpu; lens_gpu->anglesin = anglesin_gpu; // cudaMemcpy(lens_kernel, lens_gpu, sizeof(Potential_SOA), cudaMemcpyHostToDevice); // type_t time = -myseconds(); //module_potentialDerivatives_totalGradient_SOA_CPU_GPU(grid_grad_x_gpu, grid_grad_y_gpu, frame_gpu, lens_kernel, nbgridcells_x, nhalos); module_potentialDerivatives_totalGradient_SOA_CPU_GPU(grid_grad_x_gpu, grid_grad_y_gpu, frame_gpu, lens_kernel, nhalos, dx, dy, nbgridcells_x, nbgridcells_y, istart, jstart); // //cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU"); cudaDeviceSynchronize(); time += myseconds(); //std::cout << " kernel time = " << time << " s." << std::endl; // cudasafe(cudaMemcpy( grid_grad_x, grid_grad_x_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost )," --- Gradientgpu.cu : Copy source_x_gpu: " ); cudasafe(cudaMemcpy( grid_grad_y, grid_grad_y_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost)," --- Gradientgpu.cu : Copy source_y_gpu: " ); // //printf("-----> %f %f \n",grid_grad_x[Nx], grid_grad_y[Ny]); // Free GPU memory cudaFree(lens_gpu); cudaFree(type_gpu); cudaFree(lens_x_gpu); cudaFree(lens_y_gpu); cudaFree(b0_gpu); cudaFree(angle_gpu); cudaFree(epot_gpu); cudaFree(rcore_gpu); cudaFree(rcut_gpu); cudaFree(anglecos_gpu); cudaFree(anglesin_gpu); cudaFree(grid_grad_x_gpu); cudaFree(grid_grad_y_gpu); } #if 0 __global__ void module_potentialDerivatives_totalGradient_8_SOA_GPU_loc(type_t *grid_grad_x, type_t *grid_grad_y, const struct Potential_SOA *lens, const struct grid_param *frame, int nbgridcells, int shalos, int nhalos) { // //asm volatile("# module_potentialDerivatives_totalGradient_SOA begins"); // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // type_t grad_x, grad_y; type_t clumpgrad_x, clumpgrad_y; type_t image_point_x, image_point_y; // SHARED type_t cosi [200]; SHARED type_t sinu [200]; SHARED type_t rci [200]; SHARED type_t b0 [200]; SHARED type_t epsi [200]; SHARED type_t position_x[200]; SHARED type_t position_y[200]; SHARED type_t rsqe [200]; //SHARED type_t sgrad_x [(BLOCK_SIZE_X + 1)*BLOCK_SIZE_Y]; //SHARED type_t sgrad_y [(BLOCK_SIZE_X + 1)*BLOCK_SIZE_Y]; //SHARED type_t sonepeps [200]; //SHARED type_t sonemeps [200]; // grad_x = 0; grad_y = 0; // int row = blockIdx.y*blockDim.y + threadIdx.y; int col = blockIdx.x*blockDim.x + threadIdx.x; // //int loc_row = threadIdx.x; //int loc_col = threadIdx.y*blockDim.x + threadIdx.x; // //int grid_size = nbgridcells/blockDim.y; // //if (threadIdx.x == 0) printf("%d %d %d: row = %d, col = %d, grid_size = %d\n", blockIdx.y, gridDim.y, threadIdx.y, row, col, grid_size); // type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1); type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1); //if (threadIdx.x == 0) printf("dx = %f, dy = %f\n", dx, dy); // image_point_x = frame->xmin + col*dx; image_point_y = frame->ymin + row*dy; // //int iloc = threadIdx.x*blockDim.y + threadIdx.y; int iglob = row*nbgridcells + col; int numThreads = blockDim.x*blockDim.y; // for (int i = 0; i < (nhalos + numThreads - 1)/numThreads; ++i) { int iloc = threadIdx.y*blockDim.x + threadIdx.x + i*numThreads; if (iloc < nhalos) { cosi[iloc] = __ldg(&lens->anglecos [shalos + iloc]); sinu[iloc] = __ldg(&lens->anglesin [shalos + iloc]); position_x[iloc] = __ldg(&lens->position_x [shalos + iloc]); position_y[iloc] = __ldg(&lens->position_y [shalos + iloc]); rci[iloc] = __ldg(&lens->rcore [shalos + iloc]); b0[iloc] = __ldg(&lens->b0 [shalos + iloc]); epsi[iloc] = __ldg(&lens->ellipticity_potential[shalos + iloc]); rsqe[iloc] = sqrt(epsi[iloc]); } } __syncthreads(); // if ((row < nbgridcells) && (col < nbgridcells)) { // for(int i = 0; i < nhalos; i++) { //int index = iloc; #if 1 type_t rc = rci[i]; type_t eps = epsi[i]; type_t onemeps = 1 - eps; type_t onepeps = 1 + eps; // type_t sqe = rsqe[i]; type_t cx1 = onemeps/onepeps; // // //type_t zci_im = 1; type_t zci_im = -0.5*(1. - eps*eps)/sqe; type_t inv_onepeps = 1./(onepeps*onepeps); type_t inv_onemeps = 1./(onemeps*onemeps); #endif // { //KERNEL_8; type_t grad_x = grad_y = 0.; type_t true_coord_y = image_point_y - position_y[i]; type_t true_coord_x = image_point_x - position_x[i]; KERNEL_8_reg(0); grid_grad_x[iglob + 0] += grad_x; grid_grad_y[iglob + 0] += grad_y; } /* { //KERNEL_8; type_t grad_x = grad_y = 0.; type_t true_coord_y = image_point_y - position_y[i]; type_t true_coord_x = image_point_x - position_x[i] + BLOCK_SIZE_X/2*dx; KERNEL_8_reg(0); grid_grad_x[iglob + BLOCK_SIZE_X/2] += grad_x; grid_grad_y[iglob + BLOCK_SIZE_X/2] += grad_y; } */ // } } } #endif void module_potentialDerivatives_totalGradient_SOA_CPU_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart) { - struct point grad, clumpgrad; - // - grad.x = clumpgrad.x = 0.; - grad.y = clumpgrad.y = 0.; - // - int shalos = 0; // int GRID_SIZE_X = (nbgridcells_x + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks int GRID_SIZE_Y = (nbgridcells_y + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y; // printf("grid_size_x = %d, grid_size_y = %d, nbgridcells_x = %d, nbgridcells_y = %d, istart = %d, jstart = %d (split)\n", GRID_SIZE_X, GRID_SIZE_Y, nbgridcells_x, nbgridcells_y, istart, jstart); // dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1); dim3 grid (GRID_SIZE_X , GRID_SIZE_Y); // - int count = nhalos; - //printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (double) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(double)) -; printf("nhalos = %d, size of shared memory = %lf (split)\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t)); // cudaMemset(grid_grad_x, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t)); cudaMemset(grid_grad_y, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t)); // //module_potentialDerivatives_totalGradient_SOA_GPU<<>> (grid_grad_x, grid_grad_y, lens, frame, nhalos, nbgridcells_x); module_potentialDerivatives_totalGradient_SOA_GPU<<>> (grid_grad_x, grid_grad_y, lens_gpu, frame, nhalos, dx, dy, nbgridcells_x, nbgridcells_y, istart, jstart); cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU_8_SOA_GPU"); // cudaDeviceSynchronize(); printf("GPU kernel done...\n"); } // // // void module_potentialDerivatives_totalGradient_SOA_CPU_GPU(type_t *grid_grad_x, type_t *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos) { - struct point grad, clumpgrad; - // - grad.x = clumpgrad.x = 0.; - grad.y = clumpgrad.y = 0.; - // - int shalos = 0; // int GRID_SIZE_X = (nbgridcells + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks int GRID_SIZE_Y = (nbgridcells + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y; //printf("grid_size_x = %d, grid_size_y = %d\n", GRID_SIZE_X, GRID_SIZE_Y); // type_t* timer = (type_t *) malloc((int) nbgridcells*nbgridcells*sizeof(type_t)); type_t* dtimer; //cudasafe(cudaMalloc( (void**)&(dtimer), (int) nbgridcells*nbgridcells*sizeof(type_t)),"Gradientgpu.cu : totalGradient_SOA_CPU_GPU: " ); // //{ //dim3 dimBlock(BLOCK_SIZE_X/1, BLOCK_SIZE_Y); //dim3 dimGrid (GRID_SIZE_X , GRID_SIZE_Y ); dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1); dim3 grid (GRID_SIZE_X , GRID_SIZE_Y); // - int count = nhalos; + //int count = nhalos; //printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(type_t)); printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t)); // cudaMemset(grid_grad_x, 0, nbgridcells*nbgridcells*sizeof(type_t)); cudaMemset(grid_grad_y, 0, nbgridcells*nbgridcells*sizeof(type_t)); // module_potentialDerivatives_totalGradient_SOA_GPU<<>> (grid_grad_x, grid_grad_y, lens_gpu, frame, nbgridcells, nhalos); cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU_8_SOA_GPU"); //} cudaDeviceSynchronize(); printf("GPU kernel done...\n"); } diff --git a/src/grid_map_GPU.cu b/src/grid_map_GPU.cu index e81bbbd..52dc3df 100644 --- a/src/grid_map_GPU.cu +++ b/src/grid_map_GPU.cu @@ -1,267 +1,262 @@ // // // #include #include "grid_map_GPU.cuh" #include "gradient2_GPU.cuh" #include #define BLOCK_SIZE_X 32 #define BLOCK_SIZE_Y 16 //#define ROT #define _SHARED_MEM #ifdef _SHARED_MEM #define SHARED __shared__ #warning "shared memory" extern __shared__ type_t shared[]; #else #define SHARED #endif #define Nx 1 #define Ny 0 #define cudasafe extern "C" { type_t myseconds(); } // // //void amplif_grid_CPU_GPU(type_t *map,type_t *grid_grad2_a,type_t *grid_grad2_b,type_t *grid_grad2_c,type_t *grid_grad2_d, type_t dl0s, type_t z,int mode_amp,int nhalos,int nbgridcells_x, int nbgridcells_y); //map_gpu_function_t amplif_grid_CPU_GPU(type_t *map,type_t *grid_grad2_a,type_t *grid_grad2_b,type_t *grid_grad2_c,type_t *grid_grad2_d, type_t dl0s, type_t z,int mode_amp,int nhalos,int nbgridcells_x, int nbgridcells_y); __global__ void amplif_grid_GPU(type_t *ampli,type_t *grid_grad2_a,type_t *grid_grad2_b,type_t *grid_grad2_c,type_t *grid_grad2_d, type_t dl0s, type_t z,int nbgridcells); // void map_grid_GPU(map_gpu_function_t mapfunction, type_t *map, const struct cosmo_param *cosmo, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos ,int nbgridcells,int mode_amp, type_t z ) { type_t dx = (frame->xmax - frame->xmin)/(nbgridcells - 1); type_t dy = (frame->ymax - frame->ymin)/(nbgridcells - 1); // map_grid_GPU(mapfunction,map,cosmo, frame, lens, nhalos,mode_amp,z, dx, dy, nbgridcells, nbgridcells, 0, 0); } // void map_grid_GPU(map_gpu_function_t mapfunction, type_t *map,const struct cosmo_param *cosmo, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, int mode_amp, type_t z, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart) { int nBlocks_gpu = 0; // Define the number of threads per block the GPU will use cudaDeviceProp properties_gpu; - cudaGetDeviceProperties(&properties_gpu, 0); // Get properties of 0th GPU in use if (properties_gpu.maxThreadsDim[0]z[0], z, *cosmo); //type_t dos = module_cosmodistances_observerObject(z, *cosmo); lens_gpu = (Potential_SOA *) malloc(sizeof(Potential_SOA)); lens_gpu->type = (int *) malloc(sizeof(int)); // Allocate variables on the GPU cudasafe(cudaMalloc( (void**)&(lens_kernel), sizeof(Potential_SOA)),"Gradient2gpu.cu : Alloc Potential_SOA: " ); cudasafe(cudaMalloc( (void**)&(type_gpu), nhalos*sizeof(int)),"Gradient2gpu.cu : Alloc type_gpu: " ); cudasafe(cudaMalloc( (void**)&(lens_x_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc x_gpu: " ); cudasafe(cudaMalloc( (void**)&(lens_y_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc y_gpu: " ); cudasafe(cudaMalloc( (void**)&(b0_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc b0_gpu: " ); cudasafe(cudaMalloc( (void**)&(angle_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc angle_gpu: " ); cudasafe(cudaMalloc( (void**)&(epot_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc epot_gpu: " ); cudasafe(cudaMalloc( (void**)&(rcore_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc rcore_gpu: " ); cudasafe(cudaMalloc( (void**)&(rcut_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc rcut_gpu: " ); cudasafe(cudaMalloc( (void**)&(anglecos_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc anglecos_gpu: " ); cudasafe(cudaMalloc( (void**)&(anglesin_gpu), nhalos*sizeof(type_t)),"Gradient2gpu.cu : Alloc anglesin_gpu: " ); cudasafe(cudaMalloc( (void**)&(frame_gpu), sizeof(grid_param)),"Gradient2gpu.cu : Alloc frame_gpu: " ); cudasafe(cudaMalloc( (void**)&(grid_grad2_a_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_a_gpu: " ); cudasafe(cudaMalloc( (void**)&(grid_grad2_b_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_b_gpu: " ); cudasafe(cudaMalloc( (void**)&(grid_grad2_c_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_c_gpu: " ); cudasafe(cudaMalloc( (void**)&(grid_grad2_d_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc source_d_gpu: " ); cudasafe(cudaMalloc( (void**)&(map_gpu), (nbgridcells_x) * (nbgridcells_y) *sizeof(type_t)),"Gradient2gpu.cu : Alloc map: " ); // Copy values to the GPU // cudasafe(cudaMemcpy(type_gpu,lens->type , nhalos*sizeof(int),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy type_gpu: " ); cudasafe(cudaMemcpy(lens_x_gpu,lens->position_x , nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy x_gpu: " ); cudasafe(cudaMemcpy(lens_y_gpu,lens->position_y , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy y_gpu: " ); cudasafe(cudaMemcpy(b0_gpu,lens->b0 , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2pu.cu : Copy b0_gpu: " ); cudasafe(cudaMemcpy(angle_gpu,lens->ellipticity_angle , nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy angle_gpu: " ); cudasafe(cudaMemcpy(epot_gpu, lens->ellipticity_potential, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy epot_gpu: " ); cudasafe(cudaMemcpy(rcore_gpu, lens->rcore, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy rcore_gpu: " ); cudasafe(cudaMemcpy(rcut_gpu, lens->rcut, nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy rcut_gpu: " ); cudasafe(cudaMemcpy(anglecos_gpu, lens->anglecos, nhalos*sizeof(type_t),cudaMemcpyHostToDevice ),"Gradient2gpu.cu : Copy anglecos: " ); cudasafe(cudaMemcpy(anglesin_gpu, lens->anglesin, nhalos*sizeof(type_t), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy anglesin: " ); cudasafe(cudaMemcpy(frame_gpu, frame, sizeof(grid_param), cudaMemcpyHostToDevice),"Gradient2gpu.cu : Copy fame_gpu: " ); // lens_gpu->type = type_gpu; lens_gpu->position_x = lens_x_gpu; lens_gpu->position_y = lens_y_gpu; lens_gpu->b0 = b0_gpu; lens_gpu->ellipticity_angle = angle_gpu; lens_gpu->ellipticity_potential = epot_gpu; lens_gpu->rcore = rcore_gpu; lens_gpu->rcut = rcut_gpu; lens_gpu->anglecos = anglecos_gpu; lens_gpu->anglesin = anglesin_gpu; // cudaMemcpy(lens_kernel, lens_gpu, sizeof(Potential_SOA), cudaMemcpyHostToDevice); // type_t time = -myseconds(); // module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(grid_grad2_a_gpu, grid_grad2_b_gpu, grid_grad2_c_gpu, grid_grad2_d_gpu, frame_gpu, lens_kernel, nhalos, dx, dy, nbgridcells_x, nbgridcells_y, istart, jstart); // mapfunction(map_gpu,grid_grad2_a_gpu, grid_grad2_b_gpu, grid_grad2_c_gpu, grid_grad2_d_gpu,dl0s,z,mode_amp,nhalos,nbgridcells_x,nbgridcells_y); //amplif_grid_CPU_GPU(map_gpu,grid_grad2_a_gpu, grid_grad2_b_gpu, grid_grad2_c_gpu, grid_grad2_d_gpu,dl0s,z,mode_amp,nhalos,nbgridcells_x,nbgridcells_y); //cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU"); cudaDeviceSynchronize(); // cudasafe(cudaMemcpy( map, map_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost )," --- Gradient2gpu.cu : Copy source_a_gpu: " ); //cudasafe(cudaMemcpy( grid_grad2_b, grid_grad2_b_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost)," --- Gradient2gpu.cu : Copy source_b_gpu: " ); //cudasafe(cudaMemcpy( grid_grad2_c, grid_grad2_c_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost )," --- Gradient2gpu.cu : Copy source_c_gpu: " ); //cudasafe(cudaMemcpy( grid_grad2_d, grid_grad2_d_gpu, (nbgridcells_x)*(nbgridcells_y)*sizeof(type_t), cudaMemcpyDeviceToHost)," --- Gradient2gpu.cu : Copy source_d_gpu: " ); // time += myseconds(); std::cout << " kernel time = " << time << " s." << std::endl; //printf("-----> %f %f \n",grid_grad_x[Nx], grid_grad_y[Ny]); // Free GPU memory cudaFree(lens_gpu); cudaFree(type_gpu); cudaFree(lens_x_gpu); cudaFree(lens_y_gpu); cudaFree(b0_gpu); cudaFree(angle_gpu); cudaFree(epot_gpu); cudaFree(rcore_gpu); cudaFree(rcut_gpu); cudaFree(anglecos_gpu); cudaFree(anglesin_gpu); cudaFree(grid_grad2_a_gpu); cudaFree(grid_grad2_b_gpu); cudaFree(grid_grad2_c_gpu); cudaFree(grid_grad2_d_gpu); cudaFree(map_gpu); } void amplif_grid_CPU_GPU(type_t *map,type_t *grid_grad2_a,type_t *grid_grad2_b,type_t *grid_grad2_c,type_t *grid_grad2_d, type_t dl0s, type_t z,int mode_amp, int nhalos,int nbgridcells_x, int nbgridcells_y) { int GRID_SIZE_X = (nbgridcells_x + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks int GRID_SIZE_Y = (nbgridcells_y + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y; // //printf("grid_size_x = %d, grid_size_y = %d, nbgridcells_x = %d, nbgridcells_y = %d, istart = %d, jstart = %d (split)\n", GRID_SIZE_X, GRID_SIZE_Y, nbgridcells_x, nbgridcells_y, istart, jstart); // dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1); dim3 grid (GRID_SIZE_X , GRID_SIZE_Y); // - int count = nhalos; - //printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (double) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(double)); printf("nhalos = %d, size of shared memory = %lf (split)\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t)); // cudaMemset(map, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t)); // //module_potentialDerivatives_totalGradient_SOA_GPU<<>> (grid_grad_x, grid_grad_y, lens, frame, nhalos, nbgridcells_x); amplif_grid_GPU<<>> (map,grid_grad2_a, grid_grad2_b,grid_grad2_c, grid_grad2_d,dl0s,z,nbgridcells_x); cudasafe(cudaGetLastError(), "amplif_grid_CPU_GPU"); // cudaDeviceSynchronize(); printf("GPU kernel done...\n"); } __global__ void amplif_grid_GPU(type_t *ampli,type_t *grid_grad2_a,type_t *grid_grad2_b,type_t *grid_grad2_c,type_t *grid_grad2_d, type_t dl0s, type_t z,int nbgridcells) { - struct point image_point; - struct matrix grad, clumpgrad; // int col = blockIdx.x*blockDim.x + threadIdx.x; int row = blockIdx.y*blockDim.y + threadIdx.y; // if ((row < nbgridcells) && (col < nbgridcells)) { int index = row*nbgridcells + col; type_t kappa = (grid_grad2_a[index] + grid_grad2_c[index]) / 2.; ampli[index] = kappa; } } #if 0 void module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nhalos, type_t dx, type_t dy, int nbgridcells_x, int nbgridcells_y, int istart, int jstart) { int GRID_SIZE_X = (nbgridcells_x + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks int GRID_SIZE_Y = (nbgridcells_y + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y; // printf("grid_size_x = %d, grid_size_y = %d, nbgridcells_x = %d, nbgridcells_y = %d, istart = %d, jstart = %d (split)\n", GRID_SIZE_X, GRID_SIZE_Y, nbgridcells_x, nbgridcells_y, istart, jstart); // dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1); dim3 grid (GRID_SIZE_X , GRID_SIZE_Y); // int count = nhalos; //printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (double) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(double)); printf("nhalos = %d, size of shared memory = %lf (split)\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t)); // cudaMemset(grid_grad2_a, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t)); cudaMemset(grid_grad2_b, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t)); cudaMemset(grid_grad2_c, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t)); cudaMemset(grid_grad2_d, 0, nbgridcells_x*nbgridcells_y*sizeof(type_t)); // //module_potentialDerivatives_totalGradient_SOA_GPU<<>> (grid_grad_x, grid_grad_y, lens, frame, nhalos, nbgridcells_x); module_potentialDerivatives_totalGradient2_SOA_GPU<<>> (grid_grad2_a, grid_grad2_b,grid_grad2_c, grid_grad2_d, lens_gpu, frame, nhalos, dx, dy, nbgridcells_x, nbgridcells_y, istart, jstart); cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU_8_SOA_GPU"); // cudaDeviceSynchronize(); printf("GPU kernel done...\n"); } // // // void module_potentialDerivatives_totalGradient2_SOA_CPU_GPU(type_t *grid_grad2_a, type_t *grid_grad2_b, type_t *grid_grad2_c, type_t *grid_grad2_d, const struct grid_param *frame, const struct Potential_SOA *lens_gpu, int nbgridcells, int nhalos) { // int GRID_SIZE_X = (nbgridcells + BLOCK_SIZE_X - 1)/BLOCK_SIZE_X; // number of blocks int GRID_SIZE_Y = (nbgridcells + BLOCK_SIZE_Y - 1)/BLOCK_SIZE_Y; // type_t* timer = (type_t *) malloc((int) nbgridcells*nbgridcells*sizeof(type_t)); type_t* dtimer; // dim3 threads(BLOCK_SIZE_X, BLOCK_SIZE_Y/1); dim3 grid (GRID_SIZE_X , GRID_SIZE_Y); // int count = nhalos; //printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*nbgridcells/BLOCK_SIZE_Y)*sizeof(type_t)); printf("nhalos = %d, size of shared memory = %lf\n", nhalos, (type_t) (8*nhalos + BLOCK_SIZE_X*BLOCK_SIZE_Y)*sizeof(type_t)); // cudaMemset(grid_grad2_a, 0, nbgridcells*nbgridcells*sizeof(type_t)); cudaMemset(grid_grad2_b, 0, nbgridcells*nbgridcells*sizeof(type_t)); cudaMemset(grid_grad2_c, 0, nbgridcells*nbgridcells*sizeof(type_t)); cudaMemset(grid_grad2_d, 0, nbgridcells*nbgridcells*sizeof(type_t)); // //module_potentialDerivatives_totalGradient2_SOA_GPU<<>> (grid_grad2_a, grid_grad2_b,grid_grad2_c, grid_grad2_d, lens_gpu, frame, nbgridcells, nhalos); cudasafe(cudaGetLastError(), "module_potentialDerivative_totalGradient_SOA_CPU_GPU_8_SOA_GPU"); // cudaDeviceSynchronize(); printf("GPU kernel done...\n"); } #endif diff --git a/src/module_cosmodistances.cpp b/src/module_cosmodistances.cpp index a5a0843..7b5582c 100644 --- a/src/module_cosmodistances.cpp +++ b/src/module_cosmodistances.cpp @@ -1,371 +1,371 @@ /** * @file module_cosmodistances.cpp * @Author Thomas Jalabert, EPFL (me@example.com) * @date July 2015 * @version 0,1 * @brief Library for the computation of cosmological ratios * * compute the cosmological ratio of the distances between the lens and the source and the lens and the observer * */ /// include header file #include #include #include #include #include #include "module_cosmodistances.hpp" #include "gsl/gsl_integration.h" // Declare static functions that will only be used in this module // The functions are defined further below static type_t module_cosmodistances_cosmo_root(type_t z,cosmo_param C); static type_t module_cosmodistances_sk(type_t x, type_t k); static type_t module_cosmodistances_chi1(type_t z,cosmo_param C); static type_t module_cosmodistances_chi2(type_t z1, type_t z2,cosmo_param C); static type_t module_cosmodistances_chiz(type_t z,cosmo_param C); static type_t module_cosmodistances_integral_chiz_ab(type_t a, type_t b,cosmo_param C); void module_cosmodistances_relativecoordinates_XY( type_t *x, type_t *y, int iref, type_t ref_ra, type_t ref_dec ) { type_t DTR=acos(-1.)/180.; // Convert the input values to absolute WCS coordinates if ( iref == 1 || iref == 3 ) { *x /= -3600.*cos(ref_dec * DTR); *x += ref_ra; *y /= 3600.; *y += ref_dec; } else if ( iref == 2 ) // image coordinates { *x += ref_ra; *y += ref_dec; } } // Function defintions //========================================================================================================== /** @brief Calculates ratio distance_(lens-source)/distance_(source) * Calculates ratio distance_(lens-source)/distance_(source) * @param nsetofimages number of set of images * @param nImagesSet set of images * @param z_lens redshift of lens * @param source sources * @param cosmoratio variable where result is stored * @param cosmopar cosmological parameter */ void module_cosmodistances_lensSourceToSource( const int nsetofimages, int nImagesSet[], type_t z_lens, galaxy source[], type_t cosmoratio[], cosmo_param cosmopar){ //int imageCounter = 0; // Count the total number of images up to now for(int i=0; i= z2 ) return 0.; if (cosmopar.omegaX == 0.) { g1 = module_cosmodistances_cosmo_root(z1,cosmopar); g2 = module_cosmodistances_cosmo_root(z2,cosmopar); // Mattig relation for a De Sitter Universe return(2.*((1. - cosmopar.omegaM - g1*g2)*(g1 - g2)) / cosmopar.omegaM / cosmopar.omegaM / (1. + z1) / (1. + z2) / (1. + z2)); } else { if ( cosmopar.curvature != 0. ) return(module_cosmodistances_sk(module_cosmodistances_chi2(z1, z2,cosmopar)*sqrt(fabs(cosmopar.curvature)), cosmopar.curvature) / (1 + z2) / sqrt(fabs(cosmopar.curvature))); else return(module_cosmodistances_chi2(z1, z2,cosmopar) / (1 + z2)); } } /****************************************************************/ /** @brief Return the lens efficacity E=DA(LS) / DA(OS) * If zl > zs, return 0. * * @param zl redshift lens * @param zs redshift source * @param cosmopar cosmological parameter */ type_t module_cosmodistances_lensSourceToObserverSource(type_t zl, type_t zs, cosmo_param cosmopar) { if ( zl >= zs ){ printf("*******************\nWarning, a source is between the lens and the observer\n*******************\n"); printf("Zl: %f , ZS: %f \n", zl, zs); return (0.); } return (module_cosmodistances_objectObject(zl, zs, cosmopar) / module_cosmodistances_observerObject(zs, cosmopar)); } /** @brief Calculate square root * * @param z redshift * @param C cosmological parameter */ // Calculate square root static type_t module_cosmodistances_cosmo_root(type_t z,cosmo_param C) { return(sqrt(1. + C.omegaM*z)); } // Calculate S_k for cosmology /** @brief Calculate S_k for cosmology * * @param x * @param k curvature */ static type_t module_cosmodistances_sk(type_t x, type_t k) { if (k > 0) return(sin(x)); else if (k < 0) return(sinh(x)); else return(x); } /** @brief Return 1 / H(z) multiplied by H0 * chiz(z) / H0 = 1 / H(z) = 1/H0/(1+z)/sqrt( sum(i, Omega_i*(1+z)^(3*(w_i + 1) ) ) * * @param z redshift * @param C cosmological parameter */ static type_t module_cosmodistances_chiz(type_t z,cosmo_param C) { type_t x; type_t yy, yyy, y4; type_t r0, e1, e2, frac; x = 1 + z; switch (C.model) //TV CPL Model { case(1): x = -x * x * C.curvature + x * x * x * C.omegaM + C.omegaX * pow(x, 3 * (1 + C.wX + C.wa)) * exp(-3 * C.wa * z / x); break; case(2): //TV Cardassian (wx is q, wa is n) yy = pow ( (1.+C.curvature)/C.omegaM ,C.wX); yyy = (yy - 1.) * pow( x,3.*C.wX*(C.wa-1.) ); y4 = pow(1.+yyy,1./C.wX); x = -x * x * C.curvature + x * x * x * C.omegaM*y4; break; case(3): //TV Interacting DE Model (wa is delta) yy = C.omegaX*pow( x,3.*(1.+C.wX) ); yyy = ( C.omegaM/(C.wa+3.*C.wX) ) * ( C.wa*pow(x,3.*(1.+C.wX)) + 3.*C.wX*pow(x,(3.-C.wa)) ); x = -x * x * C.curvature + yy +yyy; break; case(4): //TV Holographic Ricci Scale with CPL r0 = C.omegaM/(1.-C.omegaM); e1 = (3./2.)*( (1.+r0+C.wX+4*C.wa)/(1.+r0+3.*C.wa) ); e2 = (-1./2.)*( (1.+r0-3.*C.wX)/(1.+r0+3.*C.wa) ) ; frac = ( 1.+r0+3.*C.wa*(x-1.)/x )/(1.+r0) ; yy = pow(x,e1); yyy = pow(frac,e2); x = (yy*yyy)*(yy*yyy); break; default: printf("ERROR: Unknown cosmological model %d\n", C.model); exit(-1); } if ( x <= 0 ) { printf("ERROR : H^2(z)<=0 produced (z,omegaM,omegaX,wX,wa) = (%.3lf,%.3lf,%.3lf,%.3lf,%.3lf)\n", z, C.omegaM, C.omegaX, C.wX, C.wa ); exit(-1); } return( 1. / sqrt(x) ); } /** @brief Return the proper distance Dp(0,z) divided by c/H0 * chi1(z) * c/H0 = Dp(0,z) = integral( 0, z, c*dz / H(z) ) * * @param z redshift * @param C cosmological parameter */ static type_t module_cosmodistances_chi1(type_t z,cosmo_param C) { type_t rez; rez = module_cosmodistances_integral_chiz_ab(0., z,C); return rez; } /** @brief Return the proper distance Dp(z1,z2) divided by c/H0 * chi2(z) * c/H0 = Dp(z1,z2) = integral( z1, z2, c*dz / H(z) ) * * @param zl redshift lens * @param zs redshift source * @param C cosmological parameter */ static type_t module_cosmodistances_chi2(type_t z1, type_t z2,cosmo_param C) { type_t rez; rez = module_cosmodistances_integral_chiz_ab(z1, z2,C); return rez; } /** @brief compute the integral of H0/H(z) by trapezes method * compute the integral of H0/H(z) by trapezes method * @param a,b, cosmologicalparameters cosmopar * * @param a * @param b * @param C cosmological parameter */ static double chiz_gsl(double z, void* param) { struct cosmo_param * cosmo = (struct cosmo_param *)param; return module_cosmodistances_chiz(z,*cosmo); } static type_t module_cosmodistances_integral_chiz_ab(type_t a, type_t b,cosmo_param C) { /* int i,nit; type_t res, epsilon=1.e-5; /// accuracy of the integration, slows down cosmo calculation hugely, investigate lenstool gsl (need e-6)? nit=(b-a)/epsilon; res=epsilon*(module_cosmodistances_chiz(a,C)+module_cosmodistances_chiz(b,C))/2.; for(i=1;i 0) // We have cleanlens mode { printf("DEBUG: Cleanlens D_LS/D_OS for first 2 sources: %lf, %lf. Cleanlens D_OS for first 2 sources: %lf, %lf.\n\n", cleanlensRatios_lensSourceToSource[0], cleanlensRatios_lensSourceToSource[1], cleanlens_observerSource[0], cleanlens_observerSource[1]); }; }; return 0; } diff --git a/src/module_readParameters.cpp b/src/module_readParameters.cpp index b40f609..ffce15a 100644 --- a/src/module_readParameters.cpp +++ b/src/module_readParameters.cpp @@ -1,2744 +1,2744 @@ /** * @file module_readParameters.cpp * @Author Christoph Schaefer, EPFL (christophernstrerne.schaefer@epfl.ch) * @date July 2015 * @version 0,1 * @brief read parameters, images, sources and any other relevant information * * read parameter file * */ // Include //=========================================================================================================== #include #include #include #include #include #include #include #include #include "module_readParameters.hpp" #include //=========================================================================================================== // Function definitions /** * * bayes.dat each # corresponds to one param available, it has to be in the same folder (can be changed) * * bayespot = array with bayes values * nparam = number of param available * nvalues = number of bayes potentials * */ void module_readParameters_preparebayes(int &nparam, int &nvalues){ nparam = 0; nvalues = 0; //Counting lines std::string line; std::ifstream IM("bayes.dat",std::ios::in); if ( IM ) { while( std::getline(IM,line) ) // Read every line { if ( strncmp(line.c_str(), "#", 1) == 0){ //Skipping commented lines nparam += 1; continue; } nvalues +=1; } } nvalues -= 1; //exclude last empty line //nparam -=1; // Nparam is excluded - std::cerr << "nvalues :" << nvalues << "nparam :" << nparam << std::endl; + //std::cerr << "nvalues :" << nvalues << "nparam :" << nparam << std::endl; IM.close(); } void module_readParameters_bayesmodels(double * bayespot, int nparam, int nvalues){ std::string line; std::ifstream IM("bayes.dat",std::ios::in); std::stringstream streamline; int j = 0; if ( IM ) { while( std::getline(IM,line) ) // Read every line { if ( strncmp(line.c_str(), "#", 1) == 0){ //Skipping commented lines continue; } streamline << line; for(int i = 0; i < nparam; i++){ streamline >> bayespot[j * nparam + i]; //IM >> bayespot[j * nparam + i]; std::cerr << bayespot[j * nparam + i] << " " ; } streamline.clear(); std::cerr << std::endl; j += 1; } } IM.close(); } /**setting potential using bayes[index] values. Does not support changing the bayes files config output defined by // Parameter constants in structure.h #define CX 0 #define CY 1 #define EPOT 2 #define EMASS 3 #define THETA 4 #define PHI 5 #define RC 6 #define B0 7 #define ALPHA 8 #define BETA 9 #define RCUT 10 #define MASSE 11 #define ZLENS 12 #define RCSLOPE 13 #define PMASS 14 */ void module_readParameters_setbayesmapmodels(const runmode_param* runmode, const cosmo_param* cosmology, const potentialoptimization* limit, potfile_param* potfile, Potential_SOA* lenses, double * bayespot, int nparam, int index){ int param_index = 2; double DTR=acos(-1.)/180.; /* 1 deg in rad = pi/180 */ for(int i = 0; i < runmode->nhalos; i++){ if(limit[i].position.x.block == 1){ lenses->position_x[i] = bayespot[index*nparam+param_index]; param_index++; } if(limit[i].position.y.block == 1){ lenses->position_y[i] = bayespot[index*nparam+param_index]; param_index++; } if(limit[i].ellipticity_potential.block == 1){ lenses->ellipticity_potential[i] = bayespot[index*nparam+param_index]; param_index++; } if(limit[i].ellipticity.block == 1){ lenses->ellipticity[i] = bayespot[index*nparam+param_index]; param_index++; } if(limit[i].ellipticity_angle.block == 1){ lenses->ellipticity_angle[i] = bayespot[index*nparam+param_index]* DTR; lenses->anglecos[i] = cos(lenses->ellipticity_angle[i]); lenses->anglesin[i] = sin(lenses->ellipticity_angle[i]); param_index++; } if(limit[i].rcore.block == 1){ lenses->rcore[i] = bayespot[index*nparam+param_index]; param_index++; } if(limit[i].vdisp.block == 1){ lenses->vdisp[i] = bayespot[index*nparam+param_index]; param_index++; } if(limit[i].rcut.block == 1){ lenses->rcut[i] = bayespot[index*nparam+param_index]; param_index++; } if(limit[i].z.block == 1){ lenses->z[i] = bayespot[index*nparam+param_index]; param_index++; } module_readParameters_calculatePotentialparameter_SOA(lenses, i); } //Skip redshift image optimisation if(runmode->potfile != 0){ param_index += runmode->N_z_param; - std::cerr << "N_z_param " << runmode->N_z_param << std::endl; - printf("DNDBSFB ircut %d\n",potfile->ircut); + //std::cerr << "N_z_param " << runmode->N_z_param << std::endl; + //printf("DNDBSFB ircut %d\n",potfile->ircut); //Start potfile updating if(potfile->ircut > 0){ potfile->cut1 = bayespot[index*nparam+param_index]; //printf("cur %f ircut %d\n",potfile->cut,potfile->ircut); //std::cerr << index*nparam+param_index_pot << " "<< bayespot[index*nparam+param_index_pot] << std::endl; param_index++; } //std::cerr << "param " << param_index_pot << std::endl; if(potfile->isigma > 0){ potfile->sigma1 = bayespot[index*nparam+param_index]; //std::cerr << index*nparam+param_index_pot << " "<< bayespot[index*nparam+param_index_pot] << std::endl; param_index++; } setScalingRelations(runmode,cosmology,potfile,lenses); } /* for(int i = runmode->nhalos; i < runmode->nhalos + runmode->npotfile; i++){ param_index_pot = param_index; //std::cerr << "param " << param_index_pot << std::endl; //std::cerr << "param " << param_index_pot << std::endl; module_readParameters_calculatePotentialparameter_SOA(lenses, i); }*/ //update potential parameters } /** @brief This module function reads the cosmology parameters from the parameter file * * This module function reads the cosmology parameters from the parameter file. The given pointer is * updated with the results. * Read an infile: The structure of the file is as follows: | . | . | . |cosmology | model x <---- this is a value | H0 x | . . | . . | end | . | . | finish * * @param Cosmology cosmology * @param infile path to file */ void module_readParameters_readCosmology(std::string infile, cosmo_param &Cosmology) { std::string first, second, third, line1, line2; Cosmology.model = 1; Cosmology.H0 = 50; Cosmology.h = 1.; Cosmology.omegaM = 1.; Cosmology.omegaX = 0; Cosmology.wX = -1.; Cosmology.wa = 0.; Cosmology.curvature = 0.; std::ifstream IN(infile.c_str(),std::ios::in); // open file if ( IN ) { std::getline(IN,line1); std::istringstream read1(line1); // create a stream for the line read1 >> first; while(strncmp(first.c_str(), "fini",4) != 0 ) // read line by line until finish is reached { if ( strncmp(first.c_str(), "cosmolog", 8) == 0){ // if the line contains information about cosmology std::getline(IN,line2); // read the line word by word std::istringstream read2(line2); read2 >> second >> third; while(strncmp(second.c_str(), "end",3) != 0) // Go ahead until "end" is reached { if ( !strcmp(second.c_str(), "model") ) // set model of universe { Cosmology.model=atoi(third.c_str()); } else if ( !strcmp(second.c_str(), "H0") ) // set Hubble constant { Cosmology.H0=atof(third.c_str()); Cosmology.h = Cosmology.H0 / 50.; } else if ( !strcmp(second.c_str(), "omegaM") || !strcmp(second.c_str(), "omega") ) // set density of matter { Cosmology.omegaM=atof(third.c_str()); } else if ( !strcmp(second.c_str(), "omegaX") || !strcmp(second.c_str(), "lambda") ) // set cosmological constant { Cosmology.omegaX=atof(third.c_str()); } else if ( !strcmp(second.c_str(), "wX") || !strcmp(second.c_str(), "q") || !strcmp(second.c_str(), "w0") ) // set "q" for Model 2, "wX" for Model 3, "w0" for Model 4 { Cosmology.wX=atof(third.c_str()); } else if ( !strcmp(second.c_str(), "wa") || !strcmp(second.c_str(), "n") || !strcmp(second.c_str(), "delta") || !strcmp(second.c_str(), "w1") ) // set "n" for Model 2, "delta" for model 3, "w1" for model 4 { Cosmology.wa=atof(third.c_str()); } else if ( !strcmp(second.c_str(), "omegaK") ) // set universe curvature { Cosmology.curvature=atof(third.c_str()); } // read next line std::getline(IN,line2); std::istringstream read2(line2); read2 >> second >> third; } // if a flat Universe if ( Cosmology.curvature == 0. ) { Cosmology.omegaX = 1 - Cosmology.omegaM; } else Cosmology.curvature = Cosmology.omegaM + Cosmology.omegaX - 1; } // read next line std::getline(IN,line1); std::istringstream read1(line1); read1 >> first; } IN.close(); } else { fprintf(stderr, "ERROR: file %s not found\n", infile.c_str()); // Exit if file was not found exit(-1); } } /** @brief This module function reads the number of sources, arclets, etc. in the parameter file. We need to know this to allocate the memory * * Function to read the number of multiple images and clumps * Check if there is 1 limit for each clump set * The program reads the number of potentials and limits defined, and checks whether there are the same number * Then it opens the imfile to read the numbers of images and set of images * Reads also the size of the multiple images area, the size of a pixel, and the runmode. * * @param infile path to file * @param runmode runmode parameter */ void read_runmode(std::istream &IN, struct runmode_param *runmode){ std::string first, second, third, fourth, fifth, line1, line2; //sscanf variables double in1, in2; //%lf in1, then (type_t)in1 ; std::getline(IN,line2); std::istringstream read2(line2); read2 >> second >> third >> fourth; // Read in 4 words while (strncmp(second.c_str(), "end", 3)) // Read until we reach end { if ( !strcmp(second.c_str(), "nbgridcells") ) { sscanf(line2.c_str(), " %*s %d ", &runmode->nbgridcells); } if ( !strcmp(second.c_str(), "source") ) { char filename[FILENAME_SIZE]; sscanf(line2.c_str(), " %*s %d %s ", &runmode->source, &filename); runmode->sourfile = filename; } if ( !strcmp(second.c_str(), "image") ) { char filename[FILENAME_SIZE]; sscanf(line2.c_str(), " %*s %d %s ", &runmode->image, &filename); runmode->imagefile = filename; } if ( !strcmp(second.c_str(), "inverse") ) { sscanf(line2.c_str(), " %*s %d ", &runmode->inverse); } if ( !strcmp(second.c_str(), "mass") ) { sscanf(line2.c_str(), " %*s %d %d %lf %lf", &runmode->mass, &runmode->mass_gridcells, &in1, &in2); runmode->z_mass = (type_t)in1; runmode->z_mass_s =(type_t)in2; } if ( !strcmp(second.c_str(), "poten") ) { sscanf(line2.c_str(), " %*s %d %d %lf", &runmode->potential, &runmode->pot_gridcells, &in1); runmode->z_pot = (type_t)in1; } if ( !strcmp(second.c_str(), "dpl") ) { sscanf(line2.c_str(), " %*s %d %d %lf", &runmode->dpl, &runmode->dpl_gridcells, &in1); runmode->z_dpl = (type_t)in1; } if ( !strcmp(second.c_str(), "grid") ) { sscanf(line2.c_str(), " %*s %d %d %lf", &runmode->grid, &runmode->gridcells, &in1); runmode->zgrid = (type_t)in1; } if ( !strcmp(second.c_str(), "ampli") ) { sscanf(line2.c_str(), " %*s %d %d %lf", &runmode->amplif, &runmode->amplif_gridcells, &in1); runmode->z_amplif = (type_t)in1; //std::cerr<ampli << << <arclet = 1; // Not supported yet } if (!strcmp(second.c_str(), "reference")) { sscanf(line2.c_str(), "%*s %*d %lf %lf", &in1, &in2); runmode->ref_ra = (type_t)in1; runmode->ref_dec =(type_t)in2; //std::cerr << line2 << std::endl; std::cout << "Reference: Ra " << runmode->ref_ra << " Dec:" << runmode->ref_dec <time); } if( !strncmp(second.c_str(), "Debug",5) ) // Read in if we are in debug mode { runmode->debug = 1; } // Read the next line std::getline(IN,line2); std::istringstream read2(line2); read2 >> second >> third; } } void read_runmode_potential(std::istream &IN, int &numberPotentials){ std::string first, second, third, fourth, fifth, line1, line2; numberPotentials += 1; std::getline(IN,line2); std::istringstream read2(line2); double z(0); read2 >> second >> third; //std::cout << second << third << std::endl; while (strncmp(second.c_str(), "end", 3)) // Read until we reach end { if (!strcmp(second.c_str(), "z_lens")) // Get redshift { z=atof(third.c_str()); } // Read the next line std::getline(IN,line2); std::istringstream read2(line2); read2 >> second >> third; } // Check if redshift from current halo was initialized if ( z == 0. ) { fprintf(stderr, "ERROR: A redshift is not defined for a potential \n"); exit(-1); } } void read_runmode_image(std::istream &IN, struct runmode_param *runmode){ std::string first, second, third, fourth, fifth, line1, line2; std::getline(IN,line2); std::istringstream read2(line2); double z(0); read2 >> second >> third; while (strncmp(second.c_str(), "end", 3)) // Read until we reach end { if ( !strcmp(second.c_str(), "multfile") ) { char filename[FILENAME_SIZE]; sscanf(line2.c_str(), " %*s %d %s ", &runmode->multi, &filename); runmode->imagefile = filename; } if ( !strcmp(second.c_str(), "z_m_limit") ) { runmode->N_z_param += 1 ; } //std::cout << runmode->multi << runmode->imagefile << std::endl; // Read the next line std::getline(IN,line2); std::istringstream read2(line2); read2 >> second >> third; } } void read_runmode_potfile(std::istream &IN, struct runmode_param *runmode){ std::string first, second, third, fourth, fifth, line1, line2; runmode->potfile = 1; std::getline(IN,line2); std::istringstream read2(line2); read2 >> second >> third >> fourth; // Read in 4 words while (strncmp(second.c_str(), "end", 3)) // Read until we reach end { if ( !strcmp(second.c_str(), "filein") ) { //std::cerr<< third << " " << fourth << " " << runmode->potfilename << std::endl; runmode->potfilename = fourth; //std::cerr<< line2 << runmode->potfilename; break; } // Read the next line std::getline(IN,line2); std::istringstream read2(line2); read2 >> second >> third; } } void read_runmode_countimages(struct runmode_param *runmode){ std::string line2; int old_j = 0; int j = 0; int imageIndex = 0; if (runmode->image == 1 or runmode->inverse == 1 or runmode->time >= 1 or runmode->multi >= 1){ std::string imageFile_name = runmode->imagefile; std::ifstream IM(imageFile_name.c_str(), std::ios::in); if ( IM ) { while( std::getline(IM,line2) ) // Read every line { if ( strncmp(line2.c_str(), "#", 1) == 0){ //Skipping commented lines continue; } else{ int j=atoi(line2.c_str()); if(j != old_j){ //If a new set has been reached, increase the nset iterator and update old j runmode->nsets +=1; old_j = j; } imageIndex++; } } } else{ fprintf(stderr, "ERROR: file %s not found\n", imageFile_name.c_str()); exit(-1); } runmode->nimagestot=imageIndex; IM.close(); } } void read_runmode_countsources(struct runmode_param *runmode){ std::string imageFile_name,line1, line2; if (runmode->source == 1 and runmode->image == 0 and runmode->multi == 0 ){ imageFile_name = runmode->sourfile; //printf("Source to image mode activated\n"); std::ifstream IM(imageFile_name.c_str(), std::ios::in); //printf(" Booo says hello again \n"); if ( IM ){ int i = 0; while( std::getline(IM,line1) ){ // Read until we reach the end i++; } runmode->nsets = i ; } else{ fprintf(stderr, "ERROR: file %s not found\n", imageFile_name.c_str()); exit(-1); } runmode->nimagestot= 0; // Important IM.close(); } } void read_runmode_countpotfile(struct runmode_param *runmode){ std::string imageFile_name,line1, line2; if (runmode->potfile == 1){ imageFile_name = runmode->potfilename; std::ifstream IM(imageFile_name.c_str(), std::ios::in); if ( IM ){ int i = 0; while( std::getline(IM,line1) ){ // Read until we reach the end if ( line1[0] == '#' ){ continue; } i++; } runmode->npotfile = i ; } else{ fprintf(stderr, "ERROR: file %s not found\n", imageFile_name.c_str()); exit(-1); } IM.close(); } } void module_readParameters_readRunmode(std::string infile, struct runmode_param *runmode) { std::string first, second, third, fourth, fifth, line1, line2; int Nsis(0), Npiemd(0); /// Set default values runmode->nbgridcells = 1000; runmode->source = 0; runmode->image = 0; runmode->N_z_param = 0; runmode->nsets = 0; runmode->nhalos = 0; runmode->multi = 0; runmode->mass = 0; runmode->mass_gridcells = 1000; runmode->z_mass = 0.4; runmode->z_mass_s = 0.8; runmode->potential = 0; runmode->pot_gridcells = 1000; runmode->potfile = 0; //weird seg fault due to this line, haven't figured out why runmode->npotfile = 0; runmode->z_pot = 0.8; runmode->dpl = 0; runmode->dpl_gridcells = 1000; runmode->z_dpl = 0.8; runmode->inverse = 0; runmode->arclet = 0; runmode->debug = 0; runmode->nimagestot = 0; runmode->nsets = 0; runmode->gridcells = 1000; //std::cerr << sizeof(*runmode) << std::endl; runmode->cline = 0; runmode->time = 0; runmode->inverse = 0; runmode->arclet = 0; runmode->ref_ra = 0; runmode->ref_dec = 0; int j=0; std::string imageFile_name; int imageIndex=0; int numberPotentials=0, numberLimits=0; /*************************** read nhalos, imfile_name, pixel_size, multipleimagesarea_size and runmode from configuration file *********************/ std::ifstream IN(infile.c_str(), std::ios::in); if ( IN ) { std::getline(IN,line1); std::istringstream read1(line1); // create a stream for the line read1 >> first; // Read the first word //std::cout<cline = 1; } else if (!strncmp(first.c_str(), "potfile", 7)) { read_runmode_potfile(IN,runmode); } // read the next line std::getline(IN,line1); std::istringstream read1(line1); read1 >> first; //std::cout<nhalos=numberPotentials; } else { fprintf(stderr, "ERROR: file %s not found\n", infile.c_str()); exit(-1); } // //getting nimage value (number of images), nset value (number of sources) and npotfile //if image or multi mode is activated get nimage and nset read_runmode_countimages(runmode); //if source mode is activated, get nset read_runmode_countsources(runmode); //if potfile mode, count number of potential in potfile read_runmode_countpotfile(runmode); std::cerr <<"nsets: " <nsets <<" nhalos: " << runmode->nhalos << " nimagestot: " << runmode->nimagestot << " npotfile: " << runmode->npotfile << " multi: " << runmode->multi<< std::endl; } /** @brief read the positions, redshifts and numbers of multiple images from the images file * * This module function reads in the strong lensing images * Output: image coordinates, image shapes (semi-minor, semi-major of ellipse and orientation angle), source redshifts, number of images per set * ** the images file must contain 1 x_center y_center major axis minor axis ellipticity angle redshift 1 . . . . . . 1 . . . . . . 2 . . . . . . 2 . . . . . . 2 . . . . . . 2 . . . . . . 2 . . . . . . . . . . . . . . . . . . . . . . . . . . . nsetofimages . . . . . . At each line we store in j the index of the set of images to store the redshifts and the number of images of each set At each line we add an images in the position of images' array * * @param runmode runmode parameter * @param image array where images will be stored * @param nImagesSet array where the number of images per Set will be stored */ /// read the positions, redshifts and numbers of multiple images from the images file void module_readParameters_readImages(const struct runmode_param *runmode, struct galaxy image[], int nImagesSet[]) { std::string second, line1; int imageIndex=0; int j=0; //cast variables double cast_x,cast_y,cast_a,cast_b,cast_theta,cast_z,cast_mag; for(int i=0; insets; i++){ nImagesSet[i]=0; } /*********initialisation of nimages array*********/ for(int i=0; inimagestot; i++){ /* Initialise here the variables of the images*/ image[i].center.x = image[i].center.y = 0; image[i].shape.a = image[i].shape.b = image[i].shape.theta = 0; image[i].redshift = 0; } //printf("imagefile :%d \n", runmode->imagefile.c_str // Read in images std::ifstream IM(runmode->imagefile.c_str(),std::ios::in); if ( IM ) { int i = 0; while( std::getline(IM,line1) ) // Read until we reach the end { // Read in the parameters, * means we read in a parameter but do not store it sscanf(line1.c_str(), "%d %lf %lf %lf %lf %lf %lf %lf", &j, &cast_x, &cast_y, &cast_a, &cast_b, &cast_theta, &cast_z, &cast_mag); //Casting image[i].center.x =(type_t)cast_x; image[i].center.y =(type_t)cast_y; image[i].shape.a =(type_t)cast_a; image[i].shape.b =(type_t)cast_b; image[i].shape.theta =(type_t)cast_theta; image[i].redshift =(type_t)cast_z; image[i].mag =(type_t)cast_mag; //Variables nImagesSet[j-1]++; // Increase the counter of the number of system for system with number j-1 by 1 imageIndex++; i++; } } else{ std::cout << "Error : file " << runmode->imagefile << " not found" << std::endl; exit(-1); } IM.close(); } /** @brief read the positions, redshifts and numbers of multiple sources from the sources file * * This module function reads in the strong lensing sources * Output: sources coordinates, sources shapes (semi-minor, semi-major of ellipse and orientation angle), source redshifts * * the images file must contain x_center y_center major axis minor axis ellipticity angle redshift 1 . . . . . . 2 . . . . . . 3 . . . . . . * * @param runmode runmode parameter * @param source array where sources will be stored */ /// read the positions, redshifts and numbers of multiple images from the images file void module_readParameters_readSources(struct runmode_param *runmode, struct galaxy source[]) { std::string second, line1; int j=0; //cast variables double cast_x,cast_y,cast_a,cast_b,cast_theta,cast_z,cast_mag; //Calculating nset std::ifstream IM(runmode->sourfile.c_str(),std::ios::in); if ( IM ){ int i = 0; while( std::getline(IM,line1) ){ // Read until we reach the end i++; } runmode->nsets = i ; } else{ std::cout << "Error : file " << runmode->sourfile << " not found" << std::endl; exit(-1); } IM.close(); /*********initialisation of nimages array*********/ for(int i=0; insets; i++){ /* Initialise here the variables of the images*/ source[i].center.x = source[i].center.y = 0; source[i].shape.a = source[i].shape.b = source[i].shape.theta = 0; source[i].redshift = 0; source[i].mag = 0; } std::ifstream IM2(runmode->sourfile.c_str(),std::ios::in); // Read in images if ( IM2 ) { int i = 0; while( std::getline(IM2,line1) ) // Read until we reach the end { // Read in the parameters, * means we read in a parameter but do not store it sscanf(line1.c_str(), "%d %lf %lf %lf %lf %lf %lf %lf", &j, &cast_x, &cast_y, &cast_a, &cast_b, &cast_theta, &cast_z, &cast_mag); //Casting source[i].center.x =(type_t)cast_x; source[i].center.y =(type_t)cast_y; source[i].shape.a =(type_t)cast_a; source[i].shape.b =(type_t)cast_b; source[i].shape.theta =(type_t)cast_theta; source[i].redshift =(type_t)cast_z; source[i].mag =(type_t)cast_mag; i++; } } else{ std::cout << "Error : file " << runmode->sourfile << " not found" << std::endl; exit(-1); } IM2.close(); } /** @brief This module function reads the critical and caustic line information *@param infile path to file * @param cline cline parameter variable */ #if 0 void module_readParameters_CriticCaustic(std::string infile, cline_param *cline){ std::string first, second, third, line1, line2; //cast variables double cast_1; //Default value initialiasation cline->nplan = 0; for(int i =0; i < NPZMAX; ++i){ cline->cz[i] = 0; cline->dos[i] = 0; cline->dls[i] = 0; cline->dlsds[i] = 0; } cline->limitLow = 1; cline->dmax = 1; cline->limitHigh = 10; cline->nbgridcells = 1000; std::ifstream IN(infile.c_str(), std::ios::in); if ( IN ){ while(std::getline(IN,line1)){ std::istringstream read1(line1); // create a stream for the line read1 >> first; if ( strncmp(first.c_str(), "cline", 5) == 0){ // Read in runmode information std::getline(IN,line2); std::istringstream read2(line2); read2 >> second >> third; // Read in 4 words while (strncmp(second.c_str(), "end", 3)) // Read until we reach end { if ( !strcmp(second.c_str(), "nplan") ){ sscanf(third.c_str(), "%d", &cline->nplan); if ( cline->nplan > NPZMAX ){ cline->nplan = NPZMAX; } int j = 0; while ( read2 >> third ) { sscanf(third.c_str(), "%lf", &cast_1); cline->cz[j] =(type_t)cast_1; //printf(" zf %f \n",cline->cz[j]); j++; } } if ( !strcmp(second.c_str(), "dmax") ) { sscanf(third.c_str(), "%lf", &cast_1); cline->dmax =(type_t)cast_1; cline->xmax = cline->dmax; cline->xmin = -cline->dmax; cline->ymax = cline->dmax; cline->ymin = -cline->dmax; } if ( !strcmp(second.c_str(), "pas") || !strcmp(second.c_str(), "step") || !strcmp(second.c_str(), "limitLow") ) { sscanf(third.c_str(), "%lf", &cast_1); cline->limitLow =(type_t)cast_1; } if ( !strcmp(second.c_str(), "limitHigh") ) { sscanf(third.c_str(), "%lf", &cast_1); cline->limitHigh =(type_t)cast_1; } if ( !strcmp(second.c_str(), "nbgridcells") ) { sscanf(third.c_str(), "%d", &cline->nbgridcells); } // Read the next line std::getline(IN,line2); std::istringstream read2(line2); read2 >> second >> third; } } } // closes while loop } // closes if(IN) IN.close(); } #endif /** @brief This module function reads the potfile information *@param infile path to file * @param potfile_param potfile parameter variable */ void module_readParameters_readpotfiles_param(std::string infile, potfile_param *potfile, cosmo_param cosmology){ std::string first, second, third, line1, line2; //cast variables double cast_1, cast_2; //Default value potfile initialiasation potfile->potid = 0; potfile->ftype = 0; potfile->type = 0; potfile->zlens = 0; potfile->mag0 = 0; potfile->isigma = 0; potfile->sigma = -1; potfile->core = -1.0; potfile->corekpc = -1; potfile->ircut = 0; potfile->cut1 = DBL_MAX; potfile->cut2 = 0; potfile->cutkpc1 = DBL_MAX; potfile->cutkpc2 = 0; potfile->islope = 0; potfile->slope1 = 0; potfile->slope2 = 0; potfile->npotfile = 0; potfile->ivdscat = 0; potfile->vdscat1 =0; potfile->vdscat2 = 0; potfile->ircutscat = 0; std::ifstream IN(infile.c_str(), std::ios::in); if ( IN ){ while(std::getline(IN,line1)){ std::istringstream read1(line1); // create a stream for the line read1 >> first; if ( strncmp(first.c_str(), "potfile", 7) == 0){ // Read in potfile information std::getline(IN,line2); std::istringstream read2(line2); read2 >> second >> third; // Read in 4 words while (strncmp(second.c_str(), "end", 3)) // Read until we reach end { if ( !strcmp(second.c_str(), "filein") ) { sscanf(line2.c_str(), " %*s %d %s ", &potfile->ftype, potfile->potfile); } else if ( !strcmp(second.c_str(), "type") ) { sscanf(third.c_str(), "%d", &potfile->type); } else if ( !strcmp(second.c_str(), "zlens") || !strcmp(second.c_str(), "z_lens")) { sscanf(third.c_str(), "%lf", &cast_1); potfile->zlens =(type_t)cast_1; } else if (!strcmp(second.c_str(), "mag0") || !strcmp(second.c_str(), "r200")) { sscanf(third.c_str(), "%lf", &cast_1); potfile->mag0 =(type_t)cast_1; } else if (!strcmp(second.c_str(), "select")) { sscanf(third.c_str(), "%d", &potfile->select); } else if (!strcmp(second.c_str(), "core")) { sscanf(third.c_str(), "%lf", &cast_1); potfile->core =(type_t)cast_1; } else if (!strcmp(second.c_str(), "corekpc")) { sscanf(third.c_str(), "%lf", &cast_1); potfile->corekpc =(type_t)cast_1; } else if (!strcmp(second.c_str(), "cut")) { sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ircut, &cast_1, &cast_2); potfile->cut1 =(type_t)cast_1; potfile->cut2 =(type_t)cast_2; } else if (!strcmp(second.c_str(), "cutkpc")) { sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ircut, &cast_1, &cast_2); potfile->cutkpc1 =(type_t)cast_1; potfile->cutkpc2 =(type_t)cast_2; } else if (!strcmp(second.c_str(), "slope") || !strcmp(second.c_str(), "m200slope")) { sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->islope, &cast_1, &cast_2); potfile->slope1 =(type_t)cast_1; potfile->slope2 =(type_t)cast_2; } else if (!strcmp(second.c_str(), "sigma")) { sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->isigma, &cast_1, &cast_2); potfile->sigma1 =(type_t)cast_1; potfile->sigma2 =(type_t)cast_2; } else if (!strcmp(second.c_str(), "vdslope") || !strcmp(second.c_str(), "c200slope")) { sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ivdslope, &cast_1, &cast_2); potfile->vdslope1 =(type_t)cast_1; potfile->vdslope2 =(type_t)cast_2; } else if (!strncmp(second.c_str(), "vdscat", 6)) { sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ivdscat, &cast_1, &cast_2); potfile->vdscat1 =(type_t)cast_1; potfile->vdscat2 =(type_t)cast_2; } else if (!strncmp(second.c_str(), "rcutscat", 8)) { sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ircutscat, &cast_1, &cast_2); potfile->rcutscat1 =(type_t)cast_1; potfile->rcutscat2 =(type_t)cast_2; } else if (!strcmp(second.c_str(), "a") || !strcmp(second.c_str(), "m200")) { sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ia, &cast_1, &cast_2); potfile->a1 =(type_t)cast_1; potfile->a2 =(type_t)cast_2; } else if (!strcmp(second.c_str(), "b") || !strcmp(second.c_str(), "c200")) { sscanf(line2.c_str(), " %*s %d%lf%lf", &potfile->ib, &cast_1, &cast_2); potfile->b1 =(type_t)cast_1; potfile->b2 =(type_t)cast_2; } // Read the next line std::getline(IN,line2); std::istringstream read2(line2); read2 >> second >> third; } } } // closes while loop } // closes if(IN) IN.close(); //Calculate vdisp, rcut and rcore //********************************************************************* // Set the Potfile current and limiting values //********************************************************************* if ( potfile->ftype <= 4 ) { // Scale potfile SIGMA potfile->sigma = potfile->sigma1; // ... and potfile RCUT if ( potfile->cut1 == DBL_MAX && potfile->cutkpc1 != DBL_MAX ) { potfile->cut1 = potfile->cutkpc1 / (d0 / cosmology.h * module_cosmodistances_observerObject(potfile->zlens,cosmology)); potfile->cut2 = potfile->cutkpc2 / (d0 / cosmology.h * module_cosmodistances_observerObject(potfile->zlens,cosmology)); } potfile->cut = potfile->cut1; // ... and potfile RCORE if ( potfile->core == -1.0 && potfile->corekpc != -1 ) potfile->core = potfile->corekpc / (d0 / cosmology.h * module_cosmodistances_observerObject(potfile->zlens,cosmology)); // ... and potfile RCUT SLOPE potfile->slope = potfile->slope1; // ... and potfile VDSLOPE potfile->vdslope = potfile->vdslope1; } } /** @brief This module function loads the potential from the potfile into a lens *@param infile path to file * @param cline cline parameter variable */ /* void module_readParameters_readpotfiles(const runmode_param *runmode, potfile_param *potfile, Potential *lens){ std::string first, second, line1; double cast_1, cast_2; double aa,bb; double DTR=acos(-1.)/180.; //cast variables double cast_x,cast_y,cast_theta,cast_lum,cast_mag; potfile->reference_ra = potfile->reference_dec = 0; std::cerr << "Ref pot: "<< potfile->reference_ra << " " << potfile->reference_dec << std::endl; // Read in potentials std::ifstream IM(potfile->potfile,std::ios::in); if ( IM ) { int i = runmode->nhalos; while( std::getline(IM,line1) ) // Read until we reach the end { std::istringstream read1(line1); // create a stream for the line read1 >> first; // Skip commented lines with # //std::cerr << first << std::endl; if (!strncmp(first.c_str(), "#REFERENCE", 10) ){ sscanf(line1.c_str(), " %*s %*d%lf%lf", &cast_1, &cast_2); potfile->reference_ra = (type_t) cast_1; potfile->reference_dec = (type_t) cast_2; std::cerr << "Ref potfiles: "<< potfile->reference_ra << " " << potfile->reference_dec << std::endl; continue; } else if ( line1[0] == '#' ){ continue; } // Default initialisation of clump lens[i].type = potfile->type; lens[i].z = potfile->zlens; lens[i].ellipticity_potential = lens[i].ellipticity = 0.; lens[i].alpha = 0.; lens[i].rcut = 0.; lens[i].rcore = 0.; lens[i].rscale = 0.; lens[i].mag = 0.; lens[i].lum = 0.; lens[i].vdisp = 0.; lens[i].position.x = lens[i].position.y = 0.; lens[i].ellipticity_angle = 0.; lens[i].weight = 0; lens[i].exponent = 0; lens[i].einasto_kappacritic = 0; // Read a line of the catalog if ( potfile->ftype == 1 || potfile->ftype == 3 ) { sscanf( line1.c_str(), "%s%lf%lf%lf%lf%lf%lf%lf", &lens[i].name, &cast_x, &cast_y, &aa, &bb, &cast_theta, &cast_mag, &cast_lum); lens[i].ellipticity = (type_t) (aa*aa-bb*bb)/(aa*aa+bb*bb); if ( lens[i].ellipticity < 0 ) { fprintf( stderr, "ERROR: The potfile clump %s has a negative ellipticity.\n", lens[i].name ); exit(-1); } //goto NEXT; } //Casting lens[i].position.x =(type_t)cast_x; lens[i].position.y =(type_t)cast_y; lens[i].theta =(type_t)cast_theta; lens[i].lum =(type_t)cast_lum; lens[i].mag =(type_t)cast_mag; //general parameters lens[i].vdisp = potfile->sigma; lens[i].rcore = potfile->core; lens[i].rcut = potfile->cut; lens[i].ellipticity_angle = lens[i].theta* DTR; //Calculate parameters like b0, potential ellipticity and anyother parameter depending on the profile module_readParameters_calculatePotentialparameter(&lens[i]); //Variables potfile->npotfile++; i++; } } else{ std::cout << "Error : file " << potfile->potfile << " not found" << std::endl; exit(-1); } IM.close(); } */ void module_readParameters_readpotfiles_SOA(const runmode_param *runmode, const cosmo_param *cosmology, potfile_param *potfile, Potential_SOA *lens){ std::string first, second, line1; double cast_1, cast_2; double aa,bb; double DTR=acos(-1.)/180.; /* 1 deg in rad = pi/180 */ //cast variables double cast_x,cast_y,cast_theta,cast_lum,cast_mag, cast_name; // Read in potentials std::ifstream IM(potfile->potfile,std::ios::in); if ( IM ) { int i = runmode->nhalos; while( std::getline(IM,line1) ) // Read until we reach the end { std::istringstream read1(line1); // create a stream for the line read1 >> first; // Skip commented lines with # - std::cerr << first << std::endl; + //std::cerr << first << std::endl; if (!strncmp(first.c_str(), "#REFERENCE", 10)){ sscanf(line1.c_str(), " %*s %d%lf%lf", &potfile->reference_mode, &cast_1, &cast_2); potfile->reference_ra = (type_t) cast_1; potfile->reference_dec = (type_t) cast_2; std::cerr << "Ref pot: "<< potfile->reference_ra << " " << potfile->reference_dec << std::endl; continue; } // Skip commented lines with # else if ( line1[0] == '#' ) continue; //std::cerr << "Turn " << i << std::endl; // Default initialisation of clump lens->type[i] = potfile->type; lens->z[i] = potfile->zlens; lens->ellipticity_potential[i] = lens->ellipticity[i] = 0.; lens->alpha[i] = 0.; lens->rcut[i] = 0.; lens->rcore[i] = 0.; lens->rscale[i] = 0.; lens->mag[i] = 0.; lens->lum[i] = 0.; lens->vdisp[i] = 0.; lens->position_x[i] = lens->position_y[i] = 0.; lens->ellipticity_angle[i] = 0.; lens->weight[i] = 0; lens->exponent[i] = 0; lens->einasto_kappacritic[i] = 0; //std::cerr << "Init finished "<< std::endl; // Read a line of the catalog if ( potfile->ftype == 1 || potfile->ftype == 3 ) { sscanf( line1.c_str(), "%lf%lf%lf%lf%lf%lf%lf%lf", &cast_name, &cast_x, &cast_y, &aa, &bb, &cast_theta, &cast_mag, &cast_lum); lens->ellipticity[i] = (type_t) (aa*aa-bb*bb)/(aa*aa+bb*bb); if ( lens->ellipticity[i] < 0 ) { fprintf( stderr, "ERROR: The potfile clump %d has a negative ellipticity.\n", i ); exit(-1); } //goto NEXT; } //Casting lens->name[i] =(type_t)cast_name; lens->position_x[i] =(type_t)cast_x; lens->position_y[i] =(type_t)cast_y; //module_cosmodistances_relativecoordinates_XY( &lens->position_x[i], &lens->position_y[i], potfile->reference_mode, potfile->reference_ra, potfile->reference_dec ); lens->theta[i] =(type_t)cast_theta; lens->lum[i] =(type_t)cast_lum; lens->mag[i] =(type_t)cast_mag; //general parameters //lens->vdisp[i] = potfile->sigma; //lens->rcore[i] = potfile->core; //lens->rcut[i] = potfile->cut; lens->ellipticity_angle[i] = lens->theta[i]* DTR; lens->anglecos[i] = cos(lens->ellipticity_angle[i]); lens->anglesin[i] = sin(lens->ellipticity_angle[i]); //Variables potfile->npotfile++; i++; } setScalingRelations(runmode,cosmology,potfile,lens); } else{ std::cout << "Error : file " << potfile->potfile << " not found" << std::endl; exit(-1); } IM.close(); } void setScalingRelations(const runmode_param *runmode, const cosmo_param *cosmology, potfile_param *pot, Potential_SOA* lenses){ //********************************************************************* // Check if the scaling relations are defined //********************************************************************* if ( pot->ftype == 1 ) { if ( pot->sigma1 == -1 ) { fprintf(stderr, "ERROR: potfile: sigma not defined\n"); exit(-1); } if ( pot->cutkpc1 == DBL_MAX && pot->cut1 == DBL_MAX ) { fprintf(stderr, "ERROR: potfile: cut length not defined\n"); exit(-1); } if ( pot->corekpc == -1 && pot->core == -1 ) { fprintf(stderr, "ERROR: potfile: core length not defined\n"); exit(-1); } } else{ fprintf(stderr, "ERROR: potfile: potfile type %d not supported\n", pot->ftype); exit(-1); } //********************************************************************* // Set the Potfile current and limiting values //********************************************************************* if ( pot->ftype <= 4 ) { // Scale potfile SIGMA pot->sigma = pot->sigma1; // ... and potfile RCUT if ( pot->cut1 == DBL_MAX && pot->cutkpc1 != DBL_MAX ) { pot->cut1 = pot->cutkpc1 / (d0 / cosmology->h * module_cosmodistances_observerObject(pot->zlens,*cosmology)); pot->cut2 = pot->cutkpc2 / (d0 / cosmology->h * module_cosmodistances_observerObject(pot->zlens,*cosmology)); } pot->cut = pot->cut1; // ... and potfile RCORE if ( pot->core == -1.0 && pot->corekpc != -1 ) pot->core = pot->corekpc / (d0 / cosmology->h * module_cosmodistances_observerObject(pot->zlens,*cosmology)); // ... and potfile RCUT SLOPE pot->slope = pot->slope1; // ... and potfile VDSLOPE pot->vdslope = pot->vdslope1; } // set potfile VDSCAT for all potfile scaling relations pot->vdscat = pot->vdscat1; // ... and potfile RCUTSCAT pot->rcutscat = pot->rcutscat1; for ( int i = runmode->nhalos ; i < runmode->npotfile+runmode->nhalos ; i++ ){ if ( lenses->mag[i] != 0 ){ lenses->rcore[i] = pot->core * pow(10., 0.4 * (pot->mag0 - lenses->mag[i]) / 2.);} /* * Scale the sigma and rcut of a potfile clump according to the potfile parameters */ // loop over the potfile clumps to scale if ( pot->ftype <= 4 ) { if ( lenses->mag[i] != 0 ) { lenses->vdisp[i] = pot->sigma * pow(10., 0.4 * (pot->mag0 - lenses->mag[i]) / pot->vdslope); /* The factor of 2 so that with slope1 = 4, we have * 2/slope1=1/2, then Brainerd, Blandford, Smail, 1996 */ lenses->rcut[i] = pot->cut * pow(10., 0.4 * (pot->mag0 - lenses->mag[i]) * 2. / pot->slope); } if ( pot->ivdscat != 0 ){ fprintf(stderr, "ERROR: potfile: ivdscat not supported yet\n"); exit(-1); } // Convert sigma to b0 //set_dynamics(i); if ( pot->ircutscat != 0 ){ fprintf(stderr, "ERROR: potfile: ircutscat not supported yet\n"); exit(-1); } } //Calculate parameters like b0, potential ellipticity and anyother parameter depending on the profile module_readParameters_calculatePotentialparameter_SOA(lenses,i); } } /** @brief read the information about arclets * !Not used! Will be reworked * This module function reads in the arclet images for weak lensing * @param Arclet file */ void module_readParameters_arclets(std::string arclets_filename, point arclets_position[], ellipse arclets_shape[], double arclets_redshift[]) { std::string second, line1; int j=0; //cast variables double cast_x,cast_y,cast_a,cast_b,cast_theta,cast_z; /******************** read the arclets file *******************/ /* the arclets file must contain id x_center y_center a b theta redshift line : 1 . . . . . . . 2 3 . . narclets */ std::ifstream IM(arclets_filename.c_str(),std::ios::in); if ( IM ) { while( std::getline(IM,line1)) // Read every line { // Read in parameters, * means we read the parameter but don't store it (*s) sscanf(line1.c_str(), "%*s %lf %lf %lf %lf %lf %lf", &cast_x, &cast_y, &cast_a, &cast_b, &cast_theta, &cast_z); //Casting arclets_position[j].x =(type_t)cast_x; arclets_position[j].y =(type_t)cast_y; arclets_shape[j].a =(type_t)cast_a; arclets_shape[j].b =(type_t)cast_b; arclets_shape[j].theta =(type_t)cast_theta; arclets_redshift[j] =(type_t)cast_z; j++; } IM.close(); } else { printf("Error : file %s not found\n",arclets_filename.c_str()); exit(-1); } } /** @brief This module function reads in if a parameter will be optimized by the MCMC or stay fixed. * * This module function reads in if a parameter will be optimized by the MCMC or stay fixed. * If it will be optimized, it specifies its minimum and maximum allowed values. Unless declared otherwise by the user, input values are fixed and won't be optimized. * * read an infile : | . | . |limit | x_center x x x x <--- these values contains : block min max accuracy | y_center x x x x if block=1 this is a free parameter, otherwise not | . . . . . min and max define the extremal value of this parameter | . . . . . accuracy is a criterium of convergence for the MCMC | end | . | . |limit | x_center x x x x | y_center x x x x | . . . . . | . . . . . | end | . | . |finish and fills the variables with these values * @param infile path to input file * @param host_potentialoptimization array where limits will be stored * @param nhalos number of mass distributions */ void module_readParameters_limit(std::string infile, struct potentialoptimization host_potentialoptimization[], int nhalos ) { std::string first, second, line1, line2; int i=0; //cast variables double cast_min,cast_max,cast_sigma; //double d1 = d0 / cosmology.h * module_cosmodistances_observerObject(lens_temp.z,cosmology); type_t DTR=acos(-1.)/180.; /* 1 deg in rad = pi/180 */ /*** initialize the block variables to zero (= not to be optimized) ***/ for(int index=0; index> first; if (!strncmp(first.c_str(), "limit", 5)) // Read the limits { while(std::getline(IN,line2)) { std::istringstream read2(line2); read2 >> second; // Read in 1 word if (strcmp(second.c_str(), "end") == 0) break; // stop reading at "end" and move to next potential limit section if (!strcmp(second.c_str(), "x_centre") || // Read in for x center !strcmp(second.c_str(), "x_center") ) { sscanf(line2.c_str(), "%*s %d %lf %lf %lf", &host_potentialoptimization[i].position.x.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].position.x.min =(type_t)cast_min; host_potentialoptimization[i].position.x.max =(type_t)cast_max; host_potentialoptimization[i].position.x.sigma =(type_t)cast_sigma; } else if (!strcmp(second.c_str(), "y_centre") || // Read in for y center !strcmp(second.c_str(), "y_center") ) { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].position.y.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].position.y.min =(type_t)cast_min; host_potentialoptimization[i].position.y.max =(type_t)cast_max; host_potentialoptimization[i].position.y.sigma =(type_t)cast_sigma; } else if ( !strcmp(second.c_str(), "v_disp") ) // Read in for ellipticity { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].vdisp.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].vdisp.min =(type_t)cast_min; host_potentialoptimization[i].vdisp.max =(type_t)cast_max; host_potentialoptimization[i].vdisp.sigma =(type_t)cast_sigma; } else if ( !strcmp(second.c_str(), "ellip_pot") ) // Read in for ellipticity { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].ellipticity_potential.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].ellipticity_potential.min =(type_t)cast_min; host_potentialoptimization[i].ellipticity_potential.max =(type_t)cast_max; host_potentialoptimization[i].ellipticity_potential.sigma =(type_t)cast_sigma; } else if ( !strcmp(second.c_str(), "ellipticitymass") || !strcmp(second.c_str(), "ellipticity") || !strcmp(second.c_str(), "ellipticite") ) // Read in for ellipticity { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].ellipticity.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].ellipticity.min =(type_t)cast_min; host_potentialoptimization[i].ellipticity.max =(type_t)cast_max; host_potentialoptimization[i].ellipticity.sigma =(type_t)cast_sigma; } else if (!strcmp(second.c_str(), "ellipticity_angle") || !strcmp(second.c_str(), "angle_pos")) // Read in for ellipticity angle { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].ellipticity_angle.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].ellipticity_angle.min =(type_t)cast_min; host_potentialoptimization[i].ellipticity_angle.max =(type_t)cast_max; host_potentialoptimization[i].ellipticity_angle.sigma =(type_t)cast_sigma; host_potentialoptimization[i].ellipticity_angle.min *= DTR; host_potentialoptimization[i].ellipticity_angle.max *= DTR; host_potentialoptimization[i].ellipticity_angle.sigma *= DTR; } else if ( !strcmp(second.c_str(), "rcut") || !strcmp(second.c_str(), "cut_radius")) // Read in for r cut { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].rcut.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].rcut.min =(type_t)cast_min; host_potentialoptimization[i].rcut.max =(type_t)cast_max; host_potentialoptimization[i].rcut.sigma =(type_t)cast_sigma; } else if ( !strcmp(second.c_str(), "cut_radius_kpc")) // Read in for r cut { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].rcut.block, &cast_min, &cast_max, &cast_sigma); } else if ( !strcmp(second.c_str(), "rcore") || !strcmp(second.c_str(), "core_radius")) // Read in for core radius { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].rcore.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].rcore.min =(type_t)cast_min; host_potentialoptimization[i].rcore.max =(type_t)cast_max; host_potentialoptimization[i].rcore.sigma =(type_t)cast_sigma; } else if ( !strcmp(second.c_str(), "NFW_rs") || // Read in for NFW scale radius !strcmp(second.c_str(), "rscale") ) { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].rscale.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].rscale.min =(type_t)cast_min; host_potentialoptimization[i].rscale.max =(type_t)cast_max; host_potentialoptimization[i].rscale.sigma =(type_t)cast_sigma; } else if ( !strcmp(second.c_str(), "exponent") ) // Read in for exponent { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].exponent.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].exponent.min =(type_t)cast_min; host_potentialoptimization[i].exponent.max =(type_t)cast_max; host_potentialoptimization[i].exponent.sigma =(type_t)cast_sigma; } else if ( !strcmp(second.c_str(), "alpha") ) // Read in for alpha { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].alpha.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].alpha.min =(type_t)cast_min; host_potentialoptimization[i].alpha.max =(type_t)cast_max; host_potentialoptimization[i].alpha.sigma =(type_t)cast_sigma; } else if ( !strcmp(second.c_str(), "einasto_kappacritic") ) // Read in for critical kappa { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].einasto_kappacritic.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].einasto_kappacritic.min =(type_t)cast_min; host_potentialoptimization[i].einasto_kappacritic.max =(type_t)cast_max; host_potentialoptimization[i].einasto_kappacritic.sigma =(type_t)cast_sigma; } else if (!strcmp(second.c_str(), "virial_mass") || // Read in for virial mass !strcmp(second.c_str(), "masse") || !strcmp(second.c_str(), "m200") || !strcmp(second.c_str(), "mass")) { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].weight.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].weight.min =(type_t)cast_min; host_potentialoptimization[i].weight.max =(type_t)cast_max; host_potentialoptimization[i].weight.sigma =(type_t)cast_sigma; } else if ( !strcmp(second.c_str(), "z_lens") ) // Read in for redshift { sscanf(line2.c_str(), "%*s%d%lf%lf%lf", &host_potentialoptimization[i].z.block, &cast_min, &cast_max, &cast_sigma); host_potentialoptimization[i].z.min =(type_t)cast_min; host_potentialoptimization[i].z.max =(type_t)cast_max; host_potentialoptimization[i].z.sigma =(type_t)cast_sigma; } } // end of inner while loop i++; // Move to next potential } } } IN.close(); } /** @brief This module function reads in the potential form and its parameters (e.g. NFW). * * @param infile path to input file * @param lens array where mass distributions will be stored * @param nhalos number of mass distributions */ void module_readParameters_Potential(std::string infile, Potential lens[], int nhalos) { double DTR=acos(-1.)/180.; /* 1 deg in rad = pi/180 */ Potential *ilens; std::string first, second, third, line1, line2; int i=0; std::ifstream IN(infile.c_str(), std::ios::in); if ( IN ) { while(std::getline(IN,line1)) { std::istringstream read1(line1); // create a stream for the line read1 >> first; if (!strncmp(first.c_str(), "potent", 6)) // Read in potential { /***********************************************/ ilens = &lens[i]; ilens->position.x = ilens->position.y = 0.; ilens->ellipticity = 0; ilens->ellipticity_potential = 0.; ilens->ellipticity_angle = 0.; ilens->rcut = 0.; ilens->rcore = 0; ilens->weight = 0; ilens->rscale = 0; ilens->exponent = 0; ilens->alpha = 0.; ilens->einasto_kappacritic = 0; ilens->z = 0; while(std::getline(IN,line2)) { std::istringstream read2(line2); read2 >> second >> third; if (strcmp(second.c_str(), "end") == 0) // Move to next potential and initialize it { if ( ilens->z == 0. ) // Check if redshift from current halo was initialized { fprintf(stderr, "ERROR: No redshift defined for potential %d\n", i); exit(-1); } break; // Break while loop and move to next potential } // Read in values if ( !strcmp(second.c_str(), "profil") || // Get profile !strcmp(second.c_str(), "profile") ) { if(!strcmp(third.c_str(), "PIEMD") || !strcmp(third.c_str(), "1") ) { ilens->type=1; strcpy(ilens->type_name,"PIEMD");//ilens->type_name="PIEMD"; } if(!strcmp(third.c_str(), "NFW") || !strcmp(third.c_str(), "2") ) { ilens->type=2; strcpy(ilens->type_name,"NFW");//ilens->type_name="NFW"; } if(!strcmp(third.c_str(), "SIES") || !strcmp(third.c_str(), "3") ) { ilens->type=3; strcpy(ilens->type_name,"SIES");//ilens->type_name="SIES"; } if(!strncmp(third.c_str(), "point", 5) || !strcmp(third.c_str(), "4") ) { ilens->type=4; strcpy(ilens->type_name,"point");//ilens->type_name="point"; } if(!strcmp(third.c_str(), "SIE") || !strcmp(third.c_str(), "5") ) { ilens->type=5; strcpy(ilens->type_name,"SIE");//ilens->type_name="point"; } if(!strcmp(third.c_str(), "8") ) { ilens->type=8; strcpy(ilens->type_name,"PIEMD1");//ilens->type_name="point"; } if(!strcmp(third.c_str(), "81") ) { ilens->type=81; strcpy(ilens->type_name,"PIEMD81");//ilens->type_name="point"; } } else if (!strcmp(second.c_str(), "name")) // Get name of lens { sscanf(third.c_str(),"%s",ilens->name); } else if (!strcmp(second.c_str(), "x_centre") || // Get x center !strcmp(second.c_str(), "x_center") ) { ilens->position.x=atof(third.c_str()); //std::cout << "PositionX : " << std::setprecision(15) << ilens->position.x << std::endl; } else if (!strcmp(second.c_str(), "y_centre") || // Get y center !strcmp(second.c_str(), "y_center") ) { ilens->position.y=atof(third.c_str()); } else if ( !strcmp(second.c_str(), "ellipticitymass") || !strcmp(second.c_str(), "ellipticity") ) // Get ellipticity { ilens->ellipticity=atof(third.c_str()); //ilens->ellipticity=ilens->ellipticity/3.; } else if (!strcmp(second.c_str(), "ellipticity_angle") || !strcmp(second.c_str(), "angle_pos")) // Get ellipticity angle { ilens->ellipticity_angle=atof(third.c_str()); ilens->ellipticity_angle *= DTR; } else if ( !strcmp(second.c_str(), "rcore") || !strcmp(second.c_str(), "core_radius")) // Get core radius { ilens->rcore=atof(third.c_str()); } else if (!strcmp(second.c_str(), "rcut") || !strcmp(second.c_str(), "cut_radius")) // Get cut radius { ilens->rcut=atof(third.c_str()); } else if (!strcmp(second.c_str(), "NFW_rs") || // Get scale radius of NFW !strcmp(second.c_str(), "rscale")) { ilens->rscale=atof(third.c_str()); } else if (!strcmp(second.c_str(), "exponent") ) // Get exponent { ilens->exponent=atof(third.c_str()); } else if (!strcmp(second.c_str(), "alpha") ) // Get alpha { ilens->alpha=atof(third.c_str()); } else if (!strcmp(second.c_str(), "einasto_kappacritic") || // Get critical kappa !strcmp(second.c_str(), "kappacritic")) { ilens->einasto_kappacritic=atof(third.c_str()); } else if (!strcmp(second.c_str(), "z_lens")) // Get redshift { ilens->z=atof(third.c_str()); } else if (!strcmp(second.c_str(), "v_disp")) // Get Dispersion velocity { ilens->vdisp=atof(third.c_str()); } else if ( !strncmp(second.c_str(), "virial_mass", 6) || // Get virial mass !strcmp(second.c_str(), "masse") || !strcmp(second.c_str(), "m200") || !strcmp(second.c_str(), "mass") ) { ilens->weight=atof(third.c_str()); } } // closes inner while loop //Calculate parameters like b0, potential ellipticity and anyother parameter depending on the profile module_readParameters_calculatePotentialparameter(ilens); i++; // Set counter to next potential } // closes if loop } // closes while loop if(i==0){ printf("Parameter potential not found in the file %s",infile.c_str()); exit(-1); } if(i>1){ for(int j=1; j> first; if (!strncmp(first.c_str(), "potent", 6)) // Read in potential { while(std::getline(IN,line2)) { std::istringstream read2(line2); read2 >> second >> third; if (strcmp(second.c_str(), "end") == 0) // Move to next potential and initialize it { break; // Break while loop and move to next potential } if ( !strcmp(second.c_str(), "profil") || // Get profile !strcmp(second.c_str(), "profile") ) { if(!strcmp(third.c_str(), "PIEMD") || !strcmp(third.c_str(), "1") ) { ind=atoi(third.c_str()); N_type[ind] += 1; } if(!strcmp(third.c_str(), "NFW") || !strcmp(third.c_str(), "2") ) { ind=atoi(third.c_str()); N_type[ind] += 1; } if(!strcmp(third.c_str(), "SIES") || !strcmp(third.c_str(), "3") ) { ind=atoi(third.c_str()); N_type[ind] += 1; } if(!strncmp(third.c_str(), "point", 5) || !strcmp(third.c_str(), "4") ) { ind=atoi(third.c_str()); N_type[ind] += 1; } if(!strcmp(third.c_str(), "SIE") || !strcmp(third.c_str(), "5") ) { ind=atoi(third.c_str()); N_type[ind] += 1; } if(!strcmp(third.c_str(), "8") ) //PIEMD { ind=atoi(third.c_str()); N_type[ind] += 1; } if(!strcmp(third.c_str(), "81") ) //PIEMD81 { ind=atoi(third.c_str()); N_type[ind] += 1; //std::cerr << "Type First: " << ind << std::endl; } } } } } } IN.close(); IN.clear(); IN.open(infile.c_str(), std::ios::in); } void module_readParameters_PotentialSOA_direct(std::string infile, Potential_SOA *lens_SOA, int nhalos, int npotfiles, cosmo_param cosmology){ double DTR=acos(-1.)/180.; /* 1 deg in rad = pi/180 */ double core_radius_kpc = 0.; double cut_radius_kpc = 0.; int N_type[100]; int Indice_type[100]; int ind; Potential lens_temp; //Init of lens_SOA lens_SOA->name = new type_t[nhalos+npotfiles]; lens_SOA->type = new int[nhalos+npotfiles]; lens_SOA->position_x = new type_t[nhalos+npotfiles]; lens_SOA->position_y = new type_t[nhalos+npotfiles]; lens_SOA->b0 = new type_t[nhalos+npotfiles]; lens_SOA->vdisp = new type_t[nhalos+npotfiles]; lens_SOA->ellipticity_angle = new type_t[nhalos+npotfiles]; lens_SOA->ellipticity = new type_t[nhalos+npotfiles]; lens_SOA->ellipticity_potential = new type_t[nhalos+npotfiles]; lens_SOA->rcore = new type_t[nhalos+npotfiles]; lens_SOA->rcut = new type_t[nhalos+npotfiles]; lens_SOA->z = new type_t[nhalos+npotfiles]; lens_SOA->anglecos = new type_t[nhalos+npotfiles]; lens_SOA->anglesin = new type_t[nhalos+npotfiles]; lens_SOA->mag = new type_t[nhalos+npotfiles]; lens_SOA->lum = new type_t[nhalos+npotfiles]; lens_SOA->weight = new type_t[nhalos+npotfiles]; lens_SOA->exponent = new type_t[nhalos+npotfiles]; lens_SOA->einasto_kappacritic = new type_t[nhalos+npotfiles]; lens_SOA->rscale = new type_t[nhalos+npotfiles]; lens_SOA->alpha = new type_t[nhalos+npotfiles]; lens_SOA->theta = new type_t[nhalos+npotfiles]; //Init of N_types and Indice_type (Number of lenses of a certain type) for(int i=0;i < 100; ++i){ N_type[i] = 0; Indice_type[i] = 0; } //First sweep through the runmode file to find N_type (number of types) read_potentialSOA_ntypes(infile,N_type); //Calcuting starting points for each type in lens array for(int i=1;i < 100; ++i){ Indice_type[i] = N_type[i]+Indice_type[i-1]; //printf("%d %d \n ",N_type[i], Indice_type[i]); } std::string first, second, third, line1, line2; std::ifstream IN(infile.c_str(), std::ios::in); if(IN){ while(std::getline(IN,line1)) { std::istringstream read1(line1); // create a stream for the line read1 >> first; if (!strncmp(first.c_str(), "potent", 6)) // Read in potential { lens_temp.position.x = lens_temp.position.y = 0.; lens_temp.ellipticity = 0; lens_temp.ellipticity_potential = 0.; lens_temp.ellipticity_angle = 0.; lens_temp.vdisp = 0.; lens_temp.rcut = 0.; lens_temp.rcore = 0; core_radius_kpc = 0.; cut_radius_kpc = 0; lens_temp.weight = 0; lens_temp.rscale = 0; lens_temp.exponent = 0; lens_temp.alpha = 0.; lens_temp.einasto_kappacritic = 0; lens_temp.z = 0; while(std::getline(IN,line2)) { //Init temp potential std::istringstream read2(line2); read2 >> second >> third; //std::cerr << line2 << std::endl; if (strcmp(second.c_str(), "end") == 0) // Move to next potential and initialize it { if ( lens_temp.z == 0. ) // Check if redshift from current halo was initialized { fprintf(stderr, "ERROR: No redshift defined for potential at position x: %f and y: %f \n", lens_temp.position.x , lens_temp.position.y); exit(-1); } break; // Break while loop and move to next potential } //Find profile if ( !strcmp(second.c_str(), "profil") || // Get profile !strcmp(second.c_str(), "profile") ) { lens_temp.type=atoi(third.c_str()); //std::cerr << lens_temp.type << std::endl; } else if (!strcmp(second.c_str(), "name")) // Get name of lens { sscanf(third.c_str(),"%s",lens_temp.name); } else if (!strcmp(second.c_str(), "x_centre") || // Get x center !strcmp(second.c_str(), "x_center") ) { lens_temp.position.x=atof(third.c_str()); //std::cout << "PositionX : " << std::setprecision(15) << lens_temp.position.x << std::endl; } else if (!strcmp(second.c_str(), "y_centre") || // Get y center !strcmp(second.c_str(), "y_center") ) { lens_temp.position.y=atof(third.c_str()); } else if ( !strcmp(second.c_str(), "ellipticitymass") || !strcmp(second.c_str(), "ellipticity") || !strcmp(second.c_str(), "ellipticite") ) // Get ellipticity { lens_temp.ellipticity=atof(third.c_str()); //lens_temp.ellipticity=lens_temp.ellipticity/3.; } else if (!strcmp(second.c_str(), "ellipticity_angle") || !strcmp(second.c_str(), "angle_pos")) // Get ellipticity angle { lens_temp.ellipticity_angle=atof(third.c_str()); lens_temp.ellipticity_angle *= DTR; } else if ( !strcmp(second.c_str(), "rcore") || !strcmp(second.c_str(), "core_radius")) // Get core radius { lens_temp.rcore=atof(third.c_str()); } else if (!strcmp(second.c_str(), "rcut") || !strcmp(second.c_str(), "cut_radius")) // Get cut radius { lens_temp.rcut=atof(third.c_str()); } else if ( !strcmp(second.c_str(), "core_radius_kpc")) // Get core radius { core_radius_kpc=atof(third.c_str()); } else if (!strcmp(second.c_str(), "cut_radius_kpc")) // Get cut radius { cut_radius_kpc=atof(third.c_str()); } else if (!strcmp(second.c_str(), "NFW_rs") || // Get scale radius of NFW !strcmp(second.c_str(), "rscale")) { lens_temp.rscale=atof(third.c_str()); } else if (!strcmp(second.c_str(), "exponent") ) // Get exponent { lens_temp.exponent=atof(third.c_str()); } else if (!strcmp(second.c_str(), "alpha") ) // Get alpha { lens_temp.alpha=atof(third.c_str()); } else if (!strcmp(second.c_str(), "einasto_kappacritic") || // Get critical kappa !strcmp(second.c_str(), "kappacritic")) { lens_temp.einasto_kappacritic=atof(third.c_str()); } else if (!strcmp(second.c_str(), "z_lens")) // Get redshift { lens_temp.z=atof(third.c_str()); //std::cerr << lens_temp.z << std::endl; } else if (!strcmp(second.c_str(), "v_disp")) // Get Dispersion velocity { lens_temp.vdisp=atof(third.c_str()); //std::cerr << "vdisp : "<< third << " " << lens_temp.vdisp << std::endl; } else if ( !strncmp(second.c_str(), "virial_mass", 6) || // Get virial mass !strcmp(second.c_str(), "masse") || !strcmp(second.c_str(), "m200") || !strcmp(second.c_str(), "mass") ) { lens_temp.weight=atof(third.c_str()); } } // closes inner while loop // Converting distance in kpc to arcsec. double d1 = d0 / cosmology.h * module_cosmodistances_observerObject(lens_temp.z,cosmology); - printf(" D1 HPC : %f %f %f %f\n",d1, d0,cosmology.h,lens_temp.z ); + //printf(" D1 HPC : %f %f %f %f\n",d1, d0,cosmology.h,lens_temp.z ); // Set rcore value in kpc or in arcsec. if ( core_radius_kpc != 0. ) lens_temp.rcore = core_radius_kpc / d1; else core_radius_kpc = lens_temp.rcore * d1; // Set rcut value in kpc or in arcsec. if ( cut_radius_kpc != 0. ) { //std::cerr << "d1 " << d1 << std::endl; lens_temp.rcut = cut_radius_kpc / d1;} else cut_radius_kpc = lens_temp.rcut * d1; //Calculate parameters like b0, potential ellipticity and anyother parameter depending on the profile module_readParameters_calculatePotentialparameter(&lens_temp); //assign value to SOA //std::cerr << "Type + indice :" << lens_temp.type << Indice_type[lens_temp.type-1] << std::endl; if(Indice_type[lens_temp.type-1] type[ind] = lens_temp.type; lens_SOA->position_x[ind] = lens_temp.position.x; lens_SOA->position_y[ind] = lens_temp.position.y; lens_SOA->b0[ind] = lens_temp.b0; lens_SOA->vdisp[ind] = lens_temp.vdisp; lens_SOA->ellipticity_angle[ind] = lens_temp.ellipticity_angle; lens_SOA->ellipticity[ind] = lens_temp.ellipticity; lens_SOA->ellipticity_potential[ind] = lens_temp.ellipticity_potential; lens_SOA->rcore[ind] = lens_temp.rcore; lens_SOA->rcut[ind] = lens_temp.rcut; lens_SOA->z[ind] = lens_temp.z; lens_SOA->anglecos[ind] = cos(lens_temp.ellipticity_angle); lens_SOA->anglesin[ind] = sin(lens_temp.ellipticity_angle); Indice_type[lens_temp.type-1] += 1; } } // closes if loop } // closes while loop } IN.close(); } /** @brief This module function converts potentials to potentieal_SOA (AOS to SOA conversion) * */ void module_readParameters_PotentialSOA(std::string infile, Potential *lens, Potential_SOA *lens_SOA, int nhalos){ lens_SOA->type = new int[nhalos]; lens_SOA->position_x = new type_t[nhalos]; lens_SOA->position_y = new type_t[nhalos]; lens_SOA->b0 = new type_t[nhalos]; lens_SOA->ellipticity_angle = new type_t[nhalos]; lens_SOA->ellipticity = new type_t[nhalos]; lens_SOA->ellipticity_potential = new type_t[nhalos]; lens_SOA->rcore = new type_t[nhalos]; lens_SOA->rcut = new type_t[nhalos]; lens_SOA->z = new type_t[nhalos]; lens_SOA->anglecos = new type_t[nhalos]; lens_SOA->anglesin = new type_t[nhalos]; int N_type[100]; int Indice_type[100]; int ind; for(int i=0;i < 100; ++i){ N_type[i] = 0; Indice_type[i] = 0; } for (int i = 0; i < nhalos; ++i){ N_type[lens[i].type] += 1; } for(int i=1;i < 100; ++i){ Indice_type[i] = N_type[i]+Indice_type[i-1]; //printf("%d %d \n ",N_type[i], Indice_type[i]); } for (int i = 0; i < nhalos; ++i){ if(Indice_type[lens[i].type-1] type[ind] = lens[i].type; lens_SOA->position_x[ind] = lens[i].position.x; //std::cerr << lens_SOA[1].position_x[*i_point] << " " << lens[i].position.x << std::endl; lens_SOA->position_y[ind] = lens[i].position.y; lens_SOA->b0[ind] = lens[i].b0; lens_SOA->ellipticity_angle[ind] = lens[i].ellipticity_angle; lens_SOA->ellipticity[ind] = lens[i].ellipticity; lens_SOA->ellipticity_potential[ind] = lens[i].ellipticity_potential; lens_SOA->rcore[ind] = lens[i].rcore; lens_SOA->rcut[ind] = lens[i].rcut; lens_SOA->z[ind] = lens[i].z; lens_SOA->anglecos[ind] = cos(lens[i].ellipticity_angle); lens_SOA->anglesin[ind] = sin(lens[i].ellipticity_angle); Indice_type[lens[i].type-1] += 1; } } //printf("Bla anglecos = %f\n", lens_SOA->anglecos[0]); } /** @brief This module function calculates profile depended information like the impactparameter b0 and the potential ellipticity epot * * @param lens: mass distribution for which to calculate parameters */ void module_readParameters_calculatePotentialparameter(Potential *lens){ switch (lens->type) { case(5): /*Elliptical Isothermal Sphere*/ //impact parameter b0 lens->b0 = 4* pi_c2 * lens->vdisp * lens->vdisp ; //ellipticity_potential lens->ellipticity_potential = lens->ellipticity/3 ; break; case(8): /* PIEMD */ //impact parameter b0 lens->b0 = 6.*pi_c2 * lens->vdisp * lens->vdisp; //ellipticity_parameter if ( lens->ellipticity == 0. && lens->ellipticity_potential != 0. ){ // emass is (a2-b2)/(a2+b2) lens->ellipticity = 2.*lens->ellipticity_potential / (1. + lens->ellipticity_potential * lens->ellipticity_potential); //printf("1 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); } else if ( lens->ellipticity == 0. && lens->ellipticity_potential == 0. ){ lens->ellipticity_potential = 0.00001; //printf("2 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); } else{ // epot is (a-b)/(a+b) lens->ellipticity_potential = (1. - sqrt(1 - lens->ellipticity * lens->ellipticity)) / lens->ellipticity; //printf("3 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); } break; case(81): /* PIEMD */ //impact parameter b0 lens->b0 = 6.*pi_c2 * lens->vdisp * lens->vdisp; //ellipticity_parameter if ( lens->ellipticity == 0. && lens->ellipticity_potential != 0. ){ // emass is (a2-b2)/(a2+b2) lens->ellipticity = 2.*lens->ellipticity_potential / (1. + lens->ellipticity_potential * lens->ellipticity_potential); - printf("1 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); + //printf("1 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); } else if ( lens->ellipticity == 0. && lens->ellipticity_potential == 0. ){ lens->ellipticity_potential = 0.00001; - printf("2 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); + //printf("2 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); } else{ // epot is (a-b)/(a+b) lens->ellipticity_potential = (1. - sqrt(1 - lens->ellipticity * lens->ellipticity)) / lens->ellipticity; - printf("3 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); + //printf("3 : %f %f \n",lens->ellipticity,lens->ellipticity_potential); } break; default: printf( "ERROR: LENSPARA profil type of clump %s unknown : %d\n",lens->name, lens->type); break; }; } void module_readParameters_calculatePotentialparameter_SOA(Potential_SOA *lens, int ind){ switch (lens->type[ind]) { case(5): /*Elliptical Isothermal Sphere*/ //impact parameter b0 lens->b0[ind] = 4* pi_c2 * lens->vdisp[ind] * lens->vdisp[ind] ; //ellipticity_potential lens->ellipticity_potential[ind] = lens->ellipticity[ind]/3 ; break; case(8): /* PIEMD */ //impact parameter b0 lens->b0[ind] = 6.*pi_c2 * lens->vdisp[ind] * lens->vdisp[ind]; //ellipticity_parameter if ( lens->ellipticity[ind] == 0. && lens->ellipticity_potential[ind] != 0. ){ // emass is (a2-b2)/(a2+b2) lens->ellipticity[ind] = 2.*lens->ellipticity_potential[ind] / (1. + lens->ellipticity_potential[ind] * lens->ellipticity_potential[ind]); //printf("1 : %f %f \n",lens->ellipticity[ind],lens->ellipticity_potential[ind]); } else if ( lens->ellipticity[ind] == 0. && lens->ellipticity_potential[ind] == 0. ){ lens->ellipticity_potential[ind] = 0.00001; //printf("2 : %f %f \n",lens->ellipticity[ind],lens->ellipticity_potential[ind]); } else{ // epot is (a-b)/(a+b) lens->ellipticity_potential[ind] = (1. - sqrt(1 - lens->ellipticity[ind] * lens->ellipticity[ind])) / lens->ellipticity[ind]; //printf("3 : %f %f \n",lens->ellipticity[ind],lens->ellipticity_potential[ind]); } break; case(81): /* PIEMD */ //impact parameter b0 lens->b0[ind] = 6.*pi_c2 * lens->vdisp[ind] * lens->vdisp[ind]; //ellipticity_parameter if ( lens->ellipticity[ind] == 0. && lens->ellipticity_potential[ind] != 0. ){ // emass is (a2-b2)/(a2+b2) lens->ellipticity[ind] = 2.*lens->ellipticity_potential[ind] / (1. + lens->ellipticity_potential[ind] * lens->ellipticity_potential[ind]); //printf("1 : %f %f \n",lens->ellipticity[ind],lens->ellipticity_potential[ind]); } else if ( lens->ellipticity[ind] == 0. && lens->ellipticity_potential[ind] == 0. ){ lens->ellipticity_potential[ind] = 0.00001; //printf("2 : %f %f \n",lens->ellipticity[ind],lens->ellipticity_potential[ind]); } else{ // epot is (a-b)/(a+b) lens->ellipticity_potential[ind] = (1. - sqrt(1 - lens->ellipticity[ind] * lens->ellipticity[ind])) / lens->ellipticity[ind]; //printf("3 : %f %f \n",lens->ellipticity[ind],lens->ellipticity_potential[ind]); } break; default: printf( "ERROR: LENSPARA profil type of clump %f unknown : %d N %d \n",lens->name[ind], lens->type[ind], ind); break; }; } /** @brief This module function reads the grid information * * @param infile path to input file * @param grid array where grid information will be stored */ void module_readParameters_Grid(std::string infile, grid_param *grid) { std::string first, second, third, line1, line2; double cast_1; double dmax ; std::ifstream IN(infile.c_str(), std::ios::in); if ( IN ) { while(std::getline(IN,line1)) { std::istringstream read1(line1); // create a stream for the line read1 >> first; if (!strncmp(first.c_str(), "grille", 7) || !strncmp(first.c_str(), "frame", 5) || !strncmp(first.c_str(), "champ", 5)) // Read in potential { while(std::getline(IN,line2)) { std::istringstream read2(line2); read2 >> second >> third; if (strcmp(second.c_str(), "end") == 0) // Move to next potential and initialize it { break; // Break while loop and move to next potential } // Read in values if (!strcmp(second.c_str(), "dmax")) { sscanf(third.c_str(), "%lf", &dmax); //printf( "\t%s\t%lf\n", second.c_str(), dmax); //grid->dmax = (type_t) cast_1; grid->xmin = -dmax; grid->xmax = dmax; grid->ymin = -dmax; grid->ymax = dmax; grid->rmax = dmax * sqrt(2.); } else if (!strcmp(second.c_str(), "xmin")) { sscanf(third.c_str(), "%lf", &cast_1); grid->xmin = (type_t) cast_1; //printf("\t%s\t%lf\n", second.c_str(), grid->xmin); } else if (!strcmp(second.c_str(), "xmax")) { sscanf(third.c_str(), "%lf", &cast_1); grid->xmax = (type_t) cast_1; //printf("\t%s\t%lf\n", second.c_str(), grid->xmax); } else if (!strcmp(second.c_str(), "ymin")) { sscanf(third.c_str(), "%lf", &cast_1); grid->ymin = (type_t) cast_1; //printf( "\t%s\t%lf\n", second.c_str(), grid->ymin); } else if (!strcmp(second.c_str(), "ymax")) { sscanf(third.c_str(), "%lf", &cast_1); grid->ymax = (type_t) cast_1; //printf( "\t%s\t%lf\n", second.c_str(), grid->ymax); } } } // closes if loop } // closes while loop } // closes if(IN) IN.close(); } /* @brief Get number of sets for cleanlens mode * !!Not used. Will be reworked!! * * This module function reads in the "clean lens" mode sources: These sources are read in and lensed only once assuming a fixed mass model // Then we can see the predicted location of the images and compare with their real positions * * @param clean lens file, number of clean lens images * @return coordinates for each image, shape of each image, redshifts, number of sets, number of images per set */ // Get number of sets for cleanlens mode void module_readParameters_SingleLensingSourcesNumberSets(std::string infile, int &nsetofimages_cleanlens ) { std::string line1; int setNumber=0; nsetofimages_cleanlens=0; // Get number of sets of images std::ifstream IM(infile.c_str(),std::ios::in); if ( IM ) { while( std::getline(IM,line1)) { // Read in parameters sscanf(line1.c_str(), "%d", &setNumber); if(setNumber > nsetofimages_cleanlens) nsetofimages_cleanlens = setNumber; } } IM.close(); } /** @brief Prints out cosmology */ void module_readParameters_debug_cosmology(int DEBUG, cosmo_param cosmology) { if (DEBUG == 1) // If we are in debug mode { printf("\n\n####################### Summary of Input Parameters #######################\n"); printf("Cosmology: Cosmology model = %d, omegaM = %lf, omegaX = %lf, curvature = %lf, wX = %lf, wa = %lf, H0 = %lf, h = %lf\n", cosmology.model, cosmology.omegaM, cosmology.omegaX, cosmology.curvature, cosmology.wX, cosmology.wa, cosmology.H0, cosmology.h); } } /** @brief Prints out runmode */ void module_readParameters_debug_runmode(int DEBUG, runmode_param runmode) { if (DEBUG == 1) // If we are in debug mode { printf("Runmode: nhalos = %d, nsets = %d, nimagestot = %d, source = %d, image = %d, arclet = %d, cline = %d, inverse= %d, multi= %d DEBUG = %d\n", runmode.nhalos, runmode.nsets, runmode.nimagestot, runmode.source, runmode.image, runmode.arclet, runmode.cline, runmode.inverse, runmode.multi, runmode.debug); } } /** @brief Prints out images */ void module_readParameters_debug_image(int DEBUG, galaxy image[], int nImagesSet[], int nsets) { if (DEBUG == 1) // If we are in debug mode { int index = 0; for ( int i = 0; i < nsets; ++i){ for( int j = 0; j < nImagesSet[i]; ++j){ printf("Image [%d]: x = %lf, y = %lf, shape: a = %f, b = %f, theta = %lf, redshift = %lf, nImagesSet = %d,\n",index , image[index].center.x, image[index].center.y, image[index].shape.a, image[index].shape.b, image[index].shape.theta, image[index].redshift, nImagesSet[i]); index +=1; //printf( "%d \n", index ); } } } } /** @brief Prints out source */ void module_readParameters_debug_source(int DEBUG, galaxy source[], int nsets) { if (DEBUG == 1) // If we are in debug mode { for ( int i = 0; i < nsets; ++i){ printf("Source[%d]: x = %lf, y = %lf, shape: a = %f, b = %f, theta = %lf, redshift = %lf. \n\n", i, source[i].center.x, source[i].center.y, source[i].shape.a, source[i].shape.b, source[i].shape.theta, source[i].redshift); } } } /** @brief Prints out massdistributions */ void module_readParameters_debug_potential(int DEBUG, Potential lenses[], int nhalos) { if (DEBUG == 1) // If we are in debug mode { for ( int i = 0; i < nhalos; ++i){ printf("Potential[%d]: x = %f, y = %f, vdisp = %f, type = %d, type_name = %s, name = %s,\n \t ellipticity = %f, ellipticity_pot = %f, ellipticity angle (radians) = %f, rcore = %f, rcut = %f,\n \t rscale = %f, exponent = %f, alpha = %f, einasto kappa critical = %f, z = %f\n", i,lenses[i].position.x, lenses[i].position.y, lenses[i].vdisp, lenses[i].type, lenses[i].type_name, lenses[i].name, lenses[i].ellipticity, lenses[i].ellipticity_potential, lenses[i].ellipticity_angle, lenses[i].rcore, lenses[i].rcut, lenses[i].rscale, lenses[i].exponent, lenses[i].alpha, lenses[i].einasto_kappacritic, lenses[i].z); } } } void module_readParameters_debug_potential_SOA(int DEBUG, Potential_SOA lenses, int nhalos) { if (DEBUG == 1) // If we are in debug mode { for ( int i = 0; i < nhalos; ++i){ printf("Potential[%d]: x = %f, y = %f, b0 = %f, vdisp = %f, type = %d, \n \t ellipticity = %f, ellipticity_pot = %f, ellipticity angle (radians) = %f, rcore = %f, rcut = %f,\n \t z = %f\n, angle cos %f, sin %f \n", i,lenses.position_x[i], lenses.position_y[i], lenses.b0[i],lenses.vdisp[i], lenses.type[i], lenses.ellipticity[i], lenses.ellipticity_potential[i], lenses.ellipticity_angle[i], lenses.rcore[i], lenses.rcut[i], lenses.z[i], lenses.anglecos[i], lenses.anglesin[i]); } } } /** @brief Prints out potfile_param */ void module_readParameters_debug_potfileparam(int DEBUG, potfile_param *potfile) { if (DEBUG == 1) // If we are in debug mode { printf("Potfile: potid = %d, filename = %s, ftype = %d, type = %d, zlens = %f, mag0 = %f, sigma = %f,\n \t core = %f, corekpc = %f, ircut = %d, cut1 = %f, cut2 = %f,\n \t cutkpc1 = %f, cutkpc2 = %f, islope = %d, slope1 = %f, slope2 = %f\n", potfile->potid,potfile->potfile,potfile->ftype,potfile->type,potfile->zlens,potfile->mag0,potfile->sigma,potfile->core,potfile->corekpc,potfile->ircut,potfile->cut1,potfile->cut2,potfile->cutkpc1,potfile->cutkpc2,potfile->islope,potfile->slope1,potfile->slope2); } } /** @brief Prints out cline */ void module_readParameters_debug_criticcaustic(int DEBUG, cline_param cline) { if (DEBUG == 1) // If we are in debug mode { printf("Cline: Number of planes = %d ", cline.nplan); for( int i=0; i < cline.nplan; ++i){ printf(" z[%d] = %f, ",i ,cline.cz[i]); } printf(" dmax = %f, Low = %f, High= %f, Nb of gridcells %d \n", cline.dmax , cline.limitLow ,cline.limitHigh, cline.nbgridcells); } } /** @brief Prints out limits */ void module_readParameters_debug_limit(int DEBUG, struct potentialoptimization host_potentialoptimization) { if (DEBUG == 1) // If we are in debug mode { printf("DEBUG: Center.x B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.position.x.block, host_potentialoptimization.position.x.min, host_potentialoptimization.position.x.max, host_potentialoptimization.position.x.sigma); printf("DEBUG: Center.y B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.position.y.block, host_potentialoptimization.position.y.min, host_potentialoptimization.position.y.max, host_potentialoptimization.position.y.sigma); printf("DEBUG: weight.y B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.weight.block, host_potentialoptimization.weight.min, host_potentialoptimization.weight.max, host_potentialoptimization.weight.sigma); printf("DEBUG: ellipticity_angle B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.ellipticity_angle.block, host_potentialoptimization.ellipticity_angle.min, host_potentialoptimization.ellipticity_angle.max, host_potentialoptimization.ellipticity_angle.sigma); printf("DEBUG: ellipticity B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.ellipticity.block, host_potentialoptimization.ellipticity.min, host_potentialoptimization.ellipticity.max, host_potentialoptimization.ellipticity.sigma); printf("DEBUG: ellipticity_potential B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.ellipticity_potential.block, host_potentialoptimization.ellipticity_potential.min, host_potentialoptimization.ellipticity_potential.max, host_potentialoptimization.ellipticity_potential.sigma); printf("DEBUG: rcore B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.rcore.block, host_potentialoptimization.rcore.min, host_potentialoptimization.rcore.max, host_potentialoptimization.rcore.sigma); printf("DEBUG: rcut B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.rcut.block, host_potentialoptimization.rcut.min, host_potentialoptimization.rcut.max, host_potentialoptimization.rcut.sigma); printf("DEBUG: rscale B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.rscale.block, host_potentialoptimization.rscale.min, host_potentialoptimization.rscale.max, host_potentialoptimization.rscale.sigma); printf("DEBUG: exponent B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.exponent.block, host_potentialoptimization.exponent.min, host_potentialoptimization.exponent.max, host_potentialoptimization.exponent.sigma); printf("DEBUG: vdisp B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.vdisp.block, host_potentialoptimization.vdisp.min, host_potentialoptimization.vdisp.max, host_potentialoptimization.vdisp.sigma); printf("DEBUG: alpha B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.alpha.block, host_potentialoptimization.alpha.min, host_potentialoptimization.alpha.max, host_potentialoptimization.alpha.sigma); printf("DEBUG: z B = %d, min = %f, max = %f, sigma = %f \n", host_potentialoptimization.z.block, host_potentialoptimization.z.min, host_potentialoptimization.z.max, host_potentialoptimization.z.sigma); } }