Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F95135614
gradient2.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Dec 13, 03:38
Size
7 KB
Mime Type
text/x-c
Expires
Sun, Dec 15, 03:38 (2 d)
Engine
blob
Format
Raw Data
Handle
22903034
Attached To
R1448 Lenstool-HPC
gradient2.cpp
View Options
/**
* @Author Christoph Schaefer, EPFL (christophernstrerne.schaefer@epfl.ch), Gilles Fourestey (gilles.fourestey@epfl.ch)
* @date July 2017
* @version 0,1
*
*/
#include <iostream>
#include <iomanip>
#include <string.h>
//#include <cuda_runtime.h>
#include <math.h>
#include <sys/time.h>
#include <fstream>
#include <immintrin.h>
#include <map>
/*
#ifdef __AVX__
#include "simd_math_avx.h"
#endif
#ifdef __AVX512F__
#include "simd_math_avx512f.h"
#endif
*/
#include "structure_hpc.hpp"
#include "gradient2.hpp"
//lenstool fct
/*
* derivates of I0.5 KK
* Parameters :
* - (x,y) is the computation position of the potential
* - eps is the ellepticity (a-b)/(a+b)
* - rc is the core radius
* - b0 asymptotic Einstein radius E0. (6pi*vdisp^2/c^2)
*
* Return a the 4 second derivatives of the PIEMD potential
*/
void mdci05_hpc(type_t x, type_t y, type_t eps, type_t rc, type_t b0, struct matrix *res)
{
type_t ci, sqe, cx1, cxro, cyro, wrem;
type_t didyre, didyim, didxre;// didxim;
type_t cx1inv, den1, num2, den2;
sqe = sqrt(eps);
cx1 = (1. - eps) / (1. + eps);
cx1inv = 1. / cx1;
cxro = (1. + eps) * (1. + eps); /* rem^2=x^2/(1+e^2) + y^2/(1-e^2) Eq 2.3.6*/
cyro = (1. - eps) * (1. - eps);
ci = 0.5 * (1. - eps * eps) / sqe;
wrem = sqrt(rc * rc + x * x / cxro + y * y / cyro); /*wrem^2=w^2+rem^2 with w core radius*/
den1 = 2.*sqe * wrem - y * cx1inv;
den1 = cx1 * cx1 * x * x + den1 * den1;
num2 = 2.*rc * sqe - y;
den2 = x * x + num2 * num2;
didxre = ci * ( cx1 * (2.*sqe * x * x / cxro / wrem - 2.*sqe * wrem + y * cx1inv) / den1 + num2 / den2 );
didyre = ci * ( (2 * sqe * x * y * cx1 / cyro / wrem - x) / den1 + x / den2 );
didyim = ci * ( (2 * sqe * wrem * cx1inv - y * cx1inv * cx1inv - 4 * eps * y / cyro +
2 * sqe * y * y / cyro / wrem * cx1inv) / den1 - num2 / den2 );
res->a = b0 * didxre;
res->b = res->d = b0 * didyre; //(didyre+didxim)/2.;
res->c = b0 * didyim;
// return(res);
}
void printmat(matrix A){
std::cerr << A.a << " " << A.b << " " << A.c << " " << A.d << std::endl;
}
//
// SOA versions, vectorizable
//
//
struct matrix module_potentialDerivatives_totalGradient2_81_SOA_v2(const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos)
{
asm volatile("# module_potentialDerivatives_totalGradient2_81_SOA begins");
//std::cout << "# module_potentialDerivatives_totalGradient_81_SOA begins" << std::endl;
// 6 DP loads, i.e. 48 Bytes: position_x, position_y, ellipticity_angle, ellipticity_potential, rcore, b0
//
type_t t05;
struct matrix grad2, clump, clumpcore, clumpcut;
grad2.a = clump.a = 0;
grad2.b = clump.b = 0;
grad2.c = clump.c = 0;
grad2.d = clump.d = 0;
for(int i = shalos; i < shalos + nhalos; i++)
{
struct point true_coord, true_coord_rotation;
//True coord
true_coord.x = pImage->x - lens->position_x[i];
true_coord.y = pImage->y - lens->position_y[i];
//
//std::cerr << "start" << std::endl;
//Rotation
type_t cose = lens->anglecos[i];
type_t sine = lens->anglesin[i];
//
type_t x = true_coord.x*cose + true_coord.y*sine;
type_t y = true_coord.y*cose - true_coord.x*sine;
// 81 comput
t05 = lens->rcut[i] / (lens->rcut[i] - lens->rcore[i]);
mdci05_hpc(x, y, lens->ellipticity_potential[i], lens->rcore[i], lens->b0[i], &clumpcore);
mdci05_hpc(x, y, lens->ellipticity_potential[i], lens->rcut[i], lens->b0[i], &clumpcut);
//printf("X %f Y %f Ga: %f %f\n",true_coord.x ,true_coord.y , clumpcore.a, clumpcut.a);
//printf("X %f Y %f Gb: %f %f\n",true_coord.x ,true_coord.y, clumpcore.b, clumpcut.b);
//printf("X %f Y %f Gc: %f %f\n", true_coord.x ,true_coord.y,clumpcore.c, clumpcut.c);
//printf("X %f Y %f Gd: %f %f\n",true_coord.x ,true_coord.y, clumpcore.d, clumpcut.d);
//
clumpcore.a = t05 * (clumpcore.a - clumpcut.a);
clumpcore.b = t05 * (clumpcore.b - clumpcut.b);
clumpcore.c = t05 * (clumpcore.c - clumpcut.c);
clumpcore.d = t05 * (clumpcore.d - clumpcut.d);
//printmat(clumpcore);
//printf("X %f Y %f Ga: %f %f\n",pImage->x ,pImage->y , clumpcore.a, t05);
//printf("X %f Y %f Gb: %f %f\n",pImage->x ,pImage->y, clumpcore.b, clumpcut.b);
//printf("X %f Y %f Gc: %f %f\n",pImage->x ,pImage->y, clumpcore.c, clumpcut.c);
//printf("X %f Y %f Gd: %f %f\n",pImage->x ,pImage->y, clumpcore.d, clumpcut.d);
//rotation matrix 1
clumpcut.a = clumpcore.a * cose + clumpcore.b * -sine;
clumpcut.b = clumpcore.a * sine + clumpcore.b * cose;
clumpcut.c = clumpcore.d * sine + clumpcore.c * cose;
clumpcut.d = clumpcore.d * cose + clumpcore.c * -sine;
//printf("X %f Y %f theta: %f %f %f\n",pImage->x ,pImage->y,lens->ellipticity_angle[i], cose, sine);
//printmat(clumpcut);
//rotation matrix 2
clump.a = cose * clumpcut.a + -sine * clumpcut.d;
clump.b = cose * clumpcut.b + -sine * clumpcut.c;
clump.c = sine * clumpcut.b + cose * clumpcut.c;
clump.d = sine * clumpcut.a + cose * clumpcut.d;
//printf("X %f Y %f Ga: %f \n",pImage->x ,pImage->y , clump.a );
//printf("X %f Y %f Gb: %f \n",pImage->x ,pImage->y, clump.b );
//printf("X %f Y %f Gc: %f \n",pImage->x ,pImage->y, clump.c);
//printf("X %f Y %f Gd: %f \n",pImage->x ,pImage->y, clump.d );
//printmat(clump);
grad2.a += clump.a;
grad2.b += clump.b;
grad2.c += clump.c;
grad2.d += clump.d;
}
//
return(grad2);
}
//
//This natrix handles the calling of the gradient functions for the different type without losing time to switch or if condition
//
typedef struct matrix (*halo_g2_func_t) (const struct point *pImage, const struct Potential_SOA *lens, int shalos, int nhalos);
halo_g2_func_t halo_g2_func[100] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, module_potentialDerivatives_totalGradient2_81_SOA_v2, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
//
//
//
struct matrix module_potentialDerivatives_totalGradient2_SOA(const struct point *pImage, const struct Potential_SOA *lens, int nhalos)
{
struct matrix grad2, clumpgrad;
//
grad2.a = clumpgrad.a = 0;
grad2.b = clumpgrad.b = 0;
grad2.c = clumpgrad.c = 0;
grad2.d = clumpgrad.d = 0;
//
int shalos = 0;
while (shalos < nhalos)
{
int lens_type = lens->type[shalos];
int count = 1;
while (lens->type[shalos + count] == lens_type and shalos + count < nhalos) count++;
//std::cerr << "type = " << lens_type << " " << count << " " << shalos << " " << nhalos << " " << " func = " << halo_g2_func[lens_type] << std::endl;
//
clumpgrad = (*halo_g2_func[lens_type])(pImage, lens, shalos, count);
//
grad2.a += clumpgrad.a;
grad2.b += clumpgrad.b;
grad2.c += clumpgrad.c;
grad2.d += clumpgrad.d;
shalos += count;
}
return(grad2);
}
//
Event Timeline
Log In to Comment