diff --git a/src/gradient.cpp b/src/gradient.cpp index e8e4d3f..305bb9a 100644 --- a/src/gradient.cpp +++ b/src/gradient.cpp @@ -1,972 +1,973 @@ /** * @Author Christoph Schaefer, EPFL (christophernstrerne.schaefer@epfl.ch), Gilles Fourestey (gilles.fourestey@epfl.ch) * @date July 2017 * @version 0,1 * */ #include #include #include //#include #include #include #include #include #include /* #ifdef __AVX__ #include "simd_math_avx.h" #endif #ifdef __AVX512F__ #include "simd_math_avx512f.h" #endif */ #include "structure_hpc.hpp" #include "gradient.hpp" #include "utils.hpp" //#include "iacaMarks.h" // // // /**@brief Return the gradient of the projected lens potential for one clump * !!! You have to multiply by dlsds to obtain the true gradient * for the expressions, see the papers : JP Kneib & P Natarajan, Cluster Lenses, The Astronomy and Astrophysics Review (2011) for 1 and 2 * and JP Kneib PhD (1993) for 3 * * @param pImage point where the result is computed in the lens plane * @param lens mass distribution */ // /// Useful functions // complex piemd_1derivatives_ci05(type_t x, type_t y, type_t eps, type_t rc) { type_t sqe, cx1, cxro, cyro, rem2; complex zci, znum, zden, zis, zres; type_t norm; // //std::cout << "piemd_lderivatives" << std::endl; sqe = sqrt(eps); cx1 = ((type_t) 1. - eps) / ((type_t) 1. + eps); cxro = ((type_t) 1. + eps) * ((type_t) 1. + eps); cyro = ((type_t) 1. - eps) * ((type_t) 1. - eps); // rem2 = x * x / cxro + y * y / cyro; /*zci=cpx(0.,-0.5*(1.-eps*eps)/sqe); znum=cpx(cx1*x,(2.*sqe*sqrt(rc*rc+rem2)-y/cx1)); zden=cpx(x,(2.*rc*sqe-y)); zis=pcpx(zci,lncpx(dcpx(znum,zden))); zres=pcpxflt(zis,b0);*/ // --> optimized code zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; znum.re = cx1 * x; znum.im = (type_t) 2.*sqe*sqrt(rc*rc + rem2) - y/cx1; zden.re = x; zden.im = (type_t) 2.*rc * sqe - y; norm = zden.re * zden.re + zden.im * zden.im; // zis = znum/zden zis.re = (znum.re * zden.re + znum.im * zden.im) / norm; zis.im = (znum.im * zden.re - znum.re * zden.im) / norm; norm = zis.re; zis.re = log(sqrt(norm * norm + zis.im * zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) zis.im = atan2(zis.im, norm); // norm = zis.re; zres.re = zci.re * zis.re - zci.im * zis.im; // Re( zci*ln(zis) ) zres.im = zci.im * zis.re + zis.im * zci.re; // Im( zci*ln(zis) ) // //zres.re = zis.re*b0; //zres.im = zis.im*b0; // return(zres); } // //// changes the coordinates of point P into a new basis (rotation of angle theta) //// y' y x' //// * | / //// * | / theta //// * | / //// *|--------->x /* inline struct point rotateCoordinateSystem(struct point P, double theta) { struct point Q; Q.x = P.x*cos(theta) + P.y*sin(theta); Q.y = P.y*cos(theta) - P.x*sin(theta); return(Q); } */ // // struct point grad_halo(const struct point *pImage, const struct Potential *lens) { struct point true_coord, true_coord_rot, result; type_t X, Y, R, angular_deviation, u; complex zis; // result.x = result.y = (type_t) 0.; // /*positionning at the potential center*/ // Change the origin of the coordinate system to the center of the clump true_coord.x = pImage->x - lens->position.x; true_coord.y = pImage->y - lens->position.y; //printf("x, y = %f, %f\n", lens->position.x, lens->position.y); // true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle); // switch (lens->type) { case(5): /*Elliptical Isothermal Sphere*/ /*rotation of the coordiante axes to match the potential axes*/ //true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle); // R=sqrt(true_coord_rot.x*true_coord_rot.x*((type_t) 1. - lens->ellipticity_potential) + true_coord_rot.y*true_coord_rot.y*((type_t) 1. + lens->ellipticity_potential)); //ellippot = ellipmass/3 result.x = ((type_t) 1. - lens->ellipticity_potential)*lens->b0*true_coord_rot.x/(R); result.y = ((type_t) 1. + lens->ellipticity_potential)*lens->b0*true_coord_rot.y/(R); break; case(8): /* PIEMD */ /*rotation of the coordiante axes to match the potential axes*/ /*Doing something....*/ complex zis = piemd_1derivatives_ci05(true_coord_rot.x, true_coord_rot.y, lens->ellipticity_potential, lens->rcore); // result.x = lens->b0*zis.re; result.y = lens->b0*zis.im; break; case(81): //PIEMD Kassiola & Kovner,1993 I0.5c-I0.5cut type_t t05; if ( lens->ellipticity_potential > (type_t) 2E-4 ) { //printf("1 "); t05 = lens->b0*lens->rcut/(lens->rcut - lens->rcore); complex zis = piemd_1derivatives_ci05(true_coord_rot.x, true_coord_rot.y, lens->ellipticity_potential, lens->rcore); complex zis_cut = piemd_1derivatives_ci05(true_coord_rot.x, true_coord_rot.y, lens->ellipticity_potential, lens->rcut); result.x = t05 * (zis.re - zis_cut.re); result.y = t05 * (zis.im - zis_cut.im); //printf(" g = %f %f\n", result.x, result.y); } else if (( u = true_coord_rot.x*true_coord_rot.x + true_coord_rot.y*true_coord_rot.y) > (type_t) 0. ) { //printf("2 "); // Circular dPIE Elliasdottir 2007 Eq A23 slighly modified for t05 X = lens->rcore; Y = lens->rcut; t05 = sqrt(u + X * X) - X - sqrt(u + Y * Y) + Y; // Faster and equiv to Elliasdottir (see Golse PhD) t05 *= lens->b0 * Y / (Y - X) / u; // 1/u because t05/sqrt(u) and normalised Q/sqrt(u) result.x = t05*true_coord_rot.x; result.y = t05*true_coord_rot.y; } else { //printf("3 "); result.x = (type_t) 0.; result.y = (type_t) 0.; } break; default: std::cout << "ERROR: Grad 1 profil type of clump "<< lens->name << " unknown : "<< lens->type << std::endl; break; }; result = rotateCoordinateSystem(result, -lens->ellipticity_angle); //printf(" rot grad = %.15f %.15f\n", result.x, result.y); return result; } // // // struct point module_potentialDerivatives_totalGradient(const int nhalos, const struct point* pImage, const struct Potential* lens) { struct point grad, clumpgrad; // grad.x = (type_t) 0.; grad.y = (type_t) 0.; // for(int i = 0; i < nhalos; i++) { clumpgrad = grad_halo(pImage, &lens[i]); //compute gradient for each clump separately //std::cout << clumpgrad.x << " " << clumpgrad.y << std::endl; //nan check //if(clumpgrad.x == clumpgrad.x or clumpgrad.y == clumpgrad.y) { // add the gradients grad.x += clumpgrad.x; grad.y += clumpgrad.y; } } // return(grad); } // // SOA versions, vectorizable // struct point module_potentialDerivatives_totalGradient_5_SOA(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA begins"); //printf("# module_potentialDerivatives_totalGradient_SIS_SOA begins\n"); // struct point grad, result; grad.x = (type_t) 0.; grad.y = (type_t) 0.; for(int i = shalos; i < shalos + nhalos; i++) { // struct point true_coord, true_coord_rotation; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; // true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); type_t ell_pot = lens->ellipticity_potential[i]; // type_t R = sqrt(true_coord_rotation.x*true_coord_rotation.x*((type_t) 1. - ell_pot) + true_coord_rotation.y*true_coord_rotation.y*((type_t) 1. + ell_pot)); // result.x = ((type_t) 1. - ell_pot)*lens->b0[i]*true_coord_rotation.x/R; result.y = ((type_t) 1. + ell_pot)*lens->b0[i]*true_coord_rotation.y/R; // result = rotateCoordinateSystem(result, -lens->ellipticity_angle[i]); // grad.x += result.x; grad.y += result.y; } return grad; } // // // struct point module_potentialDerivatives_totalGradient_5_SOA_v2(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins"); //printf("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins\n"); // struct point grad, result; grad.x = (type_t) 0.; grad.y = (type_t) 0.; for(int i = shalos; i < shalos + nhalos; i++) { struct point true_coord, true_coord_rotation; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; // //true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); type_t b0 = lens->b0[i]; // type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; // type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // type_t ell_pot = lens->ellipticity_potential[i]; // type_t val = x*x*((type_t) 1. - ell_pot) + y*y*((type_t) 1. + ell_pot); type_t R = 1./sqrtf(val); R = R*(1.5 - 0.5*val*R*R); result.x = ((type_t) 1. - ell_pot)*b0*x*R; result.y = ((type_t) 1. + ell_pot)*b0*y*R; // grad.x += result.x*cose - result.y*sine; grad.y += result.y*cose + result.x*sine; //grad.x = x/R; //grad.y = y/R; } return grad; } - +#if 0 struct point module_potentialDerivatives_totalGradient_5_SOA_print(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos, int index) { asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins"); //printf("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins\n"); // struct point grad, result; std::ofstream myfile; grad.x = (type_t) 0.; grad.y = (type_t) 0.; #ifdef _double std::string name = "Double_"; #else std::string name = "Float_"; #endif for(int i = shalos; i < shalos + nhalos; i++) { // struct point true_coord, true_coord_rotation; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; //// /* myfile.open (name + "pImage->x_1.txt", std::ios_base::app); myfile << index << " " << pImage->x << std::setprecision(7) << " " << std::endl; myfile.close(); // myfile.open (name + "pImage->y_1.txt", std::ios_base::app); myfile << index << " " << pImage->y << std::setprecision(7) << " " << std::endl; myfile.close(); // myfile.open (name + "true_coord.x_2.txt", std::ios_base::app); myfile << index << " " << true_coord.x << std::setprecision(7) << " " << std::endl; myfile.close(); // myfile.open (name + "true_coord.y_2.txt", std::ios_base::app); myfile << index << " " << true_coord.y << std::setprecision(7) << " " << std::endl; myfile.close(); */// type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; //// myfile.open (name + "lens->anglecos[i]_3.txt", std::ios_base::app); myfile << index << " " << lens->anglecos[i] << std::setprecision(7) << " " << std::endl; myfile.close(); // myfile.open (name + "lens->anglesin[i]_3.txt", std::ios_base::app); myfile << index << " " << lens->anglesin[i] << std::setprecision(7) << " " << std::endl; myfile.close(); //// type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; //// /* myfile.open (name + "x_4.txt", std::ios_base::app); myfile << index << " " << x << std::setprecision(7) << " " << std::endl; myfile.close(); / myfile.open (name + "y_4.txt", std::ios_base::app); myfile << index << " " << y << std::setprecision(7) << " " << std::endl; myfile.close(); *//// type_t ell_pot = lens->ellipticity_potential[i]; // type_t R = sqrt(x*x*(1 - ell_pot) + y*y*(1 + ell_pot)); // result.x = (1 - ell_pot)*lens->b0[i]*x/R; result.y = (1 + ell_pot)*lens->b0[i]*y/R; //// /* myfile.open (name + "ell_pot_5.txt", std::ios_base::app); myfile << index << " " << ell_pot << std::setprecision(7) << " " << std::endl; myfile.close(); // myfile.open (name + "R_5.txt", std::ios_base::app); myfile << index << " " << R << std::setprecision(7) << " " << std::endl; myfile.close(); myfile.open (name + "x_6.txt", std::ios_base::app); myfile << index << " " << x << std::setprecision(7) << " " << std::endl; myfile.close(); myfile.open (name + "y_6.txt", std::ios_base::app); myfile << index << " " << y << std::setprecision(7) << " " << std::endl; myfile.close(); */// grad.x += result.x*cose - result.y*sine; grad.y += result.y*cose + result.x*sine; //// /* myfile.open (name + "grad.x_7.txt", std::ios_base::app); myfile << index << " " << grad.x << std::setprecision(7) << " " << std::endl; myfile.close(); myfile.open (name + "grad.y_7.txt", std::ios_base::app); myfile << index << " " << grad.y << std::setprecision(7) << " " << std::endl; myfile.close(); *//// } return grad; } +#endif // // // struct point module_potentialDerivatives_totalGradient_5_SOA_v2_novec(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA begins"); // struct point grad, result; grad.x = (type_t) 0.; grad.y = (type_t) 0.; #pragma novector for(int i = shalos; i < shalos + nhalos; i++) { // struct point true_coord, true_coord_rotation; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; // //true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; // type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; //: type_t R = sqrt(x*x*((type_t) 1. - lens->ellipticity[i]/(type_t) 3.) + y*y*((type_t) 1. + lens->ellipticity[i]/(type_t) 3.)); result.x = ((type_t) 1. - lens->ellipticity[i]/(type_t) 3.)*lens->b0[i]*x/R; result.y = ((type_t) 1. + lens->ellipticity[i]/(type_t) 3.)*lens->b0[i]*y/R; // grad.x += result.x*cose - result.y*sine; grad.y += result.y*cose + result.x*sine; } return grad; } // // // struct point module_potentialDerivatives_totalGradient_8_SOA(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_SOA begins"); // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // struct point grad, clumpgrad; grad.x = (type_t) 0.; grad.y = (type_t) 0.; // for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // struct point true_coord, true_coord_rot; //, result; //type_t R, angular_deviation; complex zis; // //result.x = result.y = 0.; // //@@printf("image_x = %f image_y = %f\n", pImage->x, pImage->y); true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; //printf("x = %f y = %f\n", true_coord.x, true_coord.y); /*positionning at the potential center*/ // Change the origin of the coordinate system to the center of the clump true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); // type_t x = true_coord_rot.x; type_t y = true_coord_rot.y; //@@printf("x = %f y = %f\n", x, y); type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; // //std::cout << "piemd_lderivatives" << std::endl; // type_t sqe = sqrt(eps); // type_t cx1 = ((type_t) 1. - eps)/((type_t) 1. + eps); type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps); type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres; type_t norm; // zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; //@@printf("zci = %f %f\n", zci.re, zci.im); // znum.re = cx1*x; znum.im = (type_t) 2.*sqe*sqrt(rc*rc + rem2) - y/cx1; // zden.re = x; zden.im = (type_t) 2.*rc*sqe - y; norm = (zden.re*zden.re + zden.im*zden.im); // zis = znum/zden //@@printf("norm = %f\n", norm); // zis.re = (znum.re*zden.re + znum.im*zden.im)/norm; zis.im = (znum.im*zden.re - znum.re*zden.im)/norm; //@@printf("zis = %f %f\n", zis.re, zis.im); norm = zis.re; zis.re = log(sqrt(norm*norm + zis.im*zis.im)); // ln(zis) = ln(|zis|)+i.Arg(zis) zis.im = atan2(zis.im, norm); //@@printf("y,x = %f %f\n", zis.im, norm); // norm = zis.re; zres.re = zci.re*zis.re - zci.im*zis.im; // Re( zci*ln(zis) ) zres.im = zci.im*zis.re + zis.im*zci.re; // Im( zci*ln(zis) ) // //@@printf("zres: %f %f\n", zres.re, zres.im); // zis.re = zres.re; zis.im = zres.im; // //zres.re = zis.re*b0; //zres.im = zis.im*b0; // rotation clumpgrad.x = zis.re; clumpgrad.y = zis.im; clumpgrad = rotateCoordinateSystem(clumpgrad, -lens->ellipticity_angle[i]); // clumpgrad.x = lens->b0[i]*clumpgrad.x; clumpgrad.y = lens->b0[i]*clumpgrad.y; // //clumpgrad.x = lens->b0[i]*zis.re; //clumpgrad.y = lens->b0[i]*zis.im; //nan check //if(clumpgrad.x == clumpgrad.x or clumpgrad.y == clumpgrad.y) //{ // add the gradients grad.x += clumpgrad.x; grad.y += clumpgrad.y; //@@printf("grad: %f %f\n", grad.x, grad.y); //@@std::cout << "grad.x = " << grad.x << " grad.y = " << grad.y << std::endl; //} } //IACA_END; // return(grad); } // // // struct point module_potentialDerivatives_totalGradient_8_SOA_v2(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_8_SOA_v2 begins"); //std::cout << "# module_potentialDerivatives_totalGradient_8_SOA_v2 begins" << std::endl; // // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // struct point grad, clumpgrad; grad.x = 0; grad.y = 0; //printf("%d %d\n", shalos, nhalos); // for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // struct point true_coord, true_coord_rot; //, result; complex zis; type_t b0 = lens->b0[i]; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; // //true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); // type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; // type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; // type_t sqe = sqrt(eps); // type_t cx1 = ((type_t) 1. - eps)/((type_t) 1. + eps); type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps); type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres; type_t norm; // zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; // KERNEL(rc, zres) // grad.x += b0*(zres.re*cose - zres.im*sine); grad.y += b0*(zres.im*cose + zres.re*sine); // } //IACA_END; // return(grad); } // // // struct point module_potentialDerivatives_totalGradient_8_SOA_v2_novec(const struct point *pImage, const struct Potential_SOA *lens , int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_8_SOA_v2 begins"); //std::cout << "# module_potentialDerivatives_totalGradient_8_SOA_v2 begins" << std::endl; // // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // struct point grad, clumpgrad; grad.x = (type_t) 0.; grad.y = (type_t) 0.; //printf("%d %d\n", shalos, nhalos); // #pragma novector for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // struct point true_coord, true_coord_rot; //, result; complex zis; type_t b0 = lens->b0[i]; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; // //true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); // type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; // type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; // type_t sqe = sqrt(eps); // type_t cx1 = ((type_t) 1. - eps)/((type_t) 1. + eps); type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps); type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres; type_t norm; // zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; // KERNEL(rc, zres) // grad.x += b0*(zres.re*cose - zres.im*sine); grad.y += b0*(zres.im*cose + zres.re*sine); // } //IACA_END; // return(grad); } // // // struct point module_potentialDerivatives_totalGradient_81_SOA(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins"); //std::cout << "# module_potentialDerivatives_totalGradient_SOA begins" << std::endl; // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // struct point grad, clumpgrad; grad.x = (type_t) 0.; grad.y = (type_t) 0.; for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // struct point true_coord, true_coord_rot; //, result; //type_t R, angular_deviation; complex zis; // //result.x = result.y = 0.; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; /*positionning at the potential center*/ // Change the origin of the coordinate system to the center of the clump true_coord_rot = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]); // type_t x = true_coord_rot.x; type_t y = true_coord_rot.y; type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; type_t rcut = lens->rcut[i]; type_t b0 = lens->b0[i]; type_t t05 = b0*rcut/(rcut - rc); // type_t sqe = sqrt(eps); // type_t cx1 = ((type_t) 1. - eps)/((type_t) 1. + eps); type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps); type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres_rc, zres_rcut; type_t norm; // zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; // step 1 { KERNEL(rc, zres_rc) } // step 2 { KERNEL(rcut, zres_rcut) } zis.re = t05*(zres_rc.re - zres_rcut.re); zis.im = t05*(zres_rc.im - zres_rcut.im); // rotation clumpgrad.x = zis.re; clumpgrad.y = zis.im; clumpgrad = rotateCoordinateSystem(clumpgrad, -lens->ellipticity_angle[i]); // grad.x += clumpgrad.x; grad.y += clumpgrad.y; //} } //IACA_END; // return(grad); } // // // struct point module_potentialDerivatives_totalGradient_81_SOA_v2(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins"); //std::cout << "# module_potentialDerivatives_totalGradient_81_SOA begins" << std::endl; // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 // struct point grad, clumpgrad; grad.x = (type_t) 0.; grad.y = (type_t) 0.; for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // struct point true_coord, true_coord_rot; //, result; //type_t R, angular_deviation; complex zis; // //result.x = result.y = 0.; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; /*positionning at the potential center*/ // Change the origin of the coordinate system to the center of the clump type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; type_t rcut = lens->rcut[i]; type_t b0 = lens->b0[i]; type_t t05 = b0*rcut/(rcut - rc); // type_t sqe = sqrt(eps); // type_t cx1 = ((type_t) 1. - eps)/((type_t) 1. + eps); type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps); type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres_rc, zres_rcut; type_t norm; // zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; // // step 1 { KERNEL(rc, zres_rc) } // step 2 { KERNEL(rcut, zres_rcut) } zis.re = t05*(zres_rc.re - zres_rcut.re); zis.im = t05*(zres_rc.im - zres_rcut.im); // rotation grad.x += (zis.re*cose - zis.im*sine); grad.y += (zis.im*cose + zis.re*sine); // } // return(grad); } // // // struct point module_potentialDerivatives_totalGradient_81_SOA_v2_novec(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos) { asm volatile("# module_potentialDerivatives_totalGradient_81_SOA begins"); //std::cout << "# module_potentialDerivatives_totalGradient_81_SOA begins" << std::endl; // 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0 struct point grad, clumpgrad; grad.x = 0.; grad.y = 0.; #pragma novector for(int i = shalos; i < shalos + nhalos; i++) { //IACA_START; // struct point true_coord, true_coord_rot; //, result; //type_t R, angular_deviation; complex zis; // //result.x = result.y = 0.; // true_coord.x = pImage->x - lens->position_x[i]; true_coord.y = pImage->y - lens->position_y[i]; /*positionning at the potential center*/ // Change the origin of the coordinate system to the center of the clump type_t cose = lens->anglecos[i]; type_t sine = lens->anglesin[i]; type_t x = true_coord.x*cose + true_coord.y*sine; type_t y = true_coord.y*cose - true_coord.x*sine; // type_t eps = lens->ellipticity_potential[i]; type_t rc = lens->rcore[i]; type_t rcut = lens->rcut[i]; type_t b0 = lens->b0[i]; type_t t05 = b0*rcut/(rcut - rc); // type_t sqe = sqrt(eps); // type_t cx1 = ((type_t) 1. - eps)/((type_t) 1. + eps); type_t cxro = ((type_t) 1. + eps)*((type_t) 1. + eps); type_t cyro = ((type_t) 1. - eps)*((type_t) 1. - eps); // type_t rem2 = x*x/cxro + y*y/cyro; // complex zci, znum, zden, zres_rc, zres_rcut; type_t norm; // zci.re = (type_t) 0.; zci.im = (type_t) -0.5*((type_t) 1. - eps*eps)/sqe; // // step 1 { KERNEL(rc, zres_rc) } // step 2 { KERNEL(rcut, zres_rcut) } zis.re = t05*(zres_rc.re - zres_rcut.re); zis.im = t05*(zres_rc.im - zres_rcut.im); // rotation grad.x += (zis.re*cose - zis.im*sine); grad.y += (zis.im*cose + zis.re*sine); // } // return(grad); } // // // typedef struct point (*halo_func_t) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos); halo_func_t halo_func[100] = { 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_5_SOA_v2, 0, 0, module_potentialDerivatives_totalGradient_8_SOA_v2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_81_SOA_v2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; // // // struct point module_potentialDerivatives_totalGradient_SOA(const struct point *pImage, const struct Potential_SOA *lens, int nhalos) { struct point grad, clumpgrad; // grad.x = clumpgrad.x = (type_t) 0.; grad.y = clumpgrad.y = (type_t) 0.; // int shalos = 0; while (shalos < nhalos) { int lens_type = lens->type[shalos]; int count = 1; while (lens->type[shalos + count] == lens_type and shalos + count < nhalos){ //int i = count; //std::cerr << lens->position_x[i] << lens->position_y[i] << lens->anglecos[i]<< lens->anglesin[i]<< lens->ellipticity_potential[i] << lens->rcore[i] << lens->rcut[i] << lens->b0[i] << std::endl; //std::cerr << "lens->type[shalos + count] = " << lens->type[shalos + count] << " " << lens_type << " " << lens_type << " " << " count = " << count << std::endl; count++; } // clumpgrad = (*halo_func[lens_type])(pImage, lens, shalos, count); // //std::cerr << lens->position_x[i] << lens->position_y[i] << lens->anglecos[i]<< lens->anglesin[i]<< lens->ellipticity_potential[i] << lens->rcore[i] << lens->rcut[i] << lens->b0[i] << std::endl; //std::cerr << "type = " << lens_type << " " << count << " " << nhalos << " " << " grad.x = " << clumpgrad.x << " grad.y = " << clumpgrad.y << std::endl; grad.x += clumpgrad.x; grad.y += clumpgrad.y; shalos += count; } return(grad); } // typedef struct point (*halo_func_t_novec) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos); // halo_func_t_novec halo_func_novec[100] = { 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_5_SOA_v2_novec, 0, 0, module_potentialDerivatives_totalGradient_8_SOA_v2_novec, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, module_potentialDerivatives_totalGradient_81_SOA_v2_novec, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; // // // struct point module_potentialDerivatives_totalGradient_SOA_novec(const struct point *pImage, const struct Potential_SOA *lens, int nhalos) { struct point grad, clumpgrad; // grad.x = clumpgrad.x = 0; grad.y = clumpgrad.y = 0; // int shalos = 0; // //module_potentialDerivatives_totalGradient_81_SOA(pImage, lens, 0, nhalos); //return; /* int* p_type = &(lens->type)[0]; int* lens_type = (int*) malloc(nhalos*sizeof(int)); memcpy(lens_type, &(lens->type)[0], nhalos*sizeof(int)); */ //quicksort(lens_type, nhalos); // while (shalos < nhalos) { int lens_type = lens->type[shalos]; int count = 1; while (lens->type[shalos + count] == lens_type) count++; //std::cerr << "type = " << lens_type << " " << count << " " << shalos << std::endl; // clumpgrad = (*halo_func_novec[lens_type])(pImage, lens, shalos, count); // grad.x += clumpgrad.x; grad.y += clumpgrad.y; shalos += count; } return(grad); }