Page MenuHomec4science

grid_gradient_mixed_CPU.cpp
No OneTemporary

File Metadata

Created
Wed, Oct 16, 07:52

grid_gradient_mixed_CPU.cpp

#include <stdlib.h>
#include <iomanip>
#include "grid_gradient_mixed_CPU.hpp"
#include "structure_mixed.hpp"
//#include <iacaMarks.h>
extern "C"{
double mysecond()
{
struct timeval tp;
struct timezone tzp;
//
int i = gettimeofday(&tp,&tzp);
//
return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 );
}
}
//
//
//
//#define SIS_THRESHOLD 0.0092
//
//
//
//struct point_double module_potentialDerivatives_totalGradient_5_SOA_DP_v2(const struct point_double *pImage, const struct Potential_SOA *lens, int shalos, int nhalos, int echo = false)
//inline
struct point_double module_potentialDerivatives_totalGradient_5_SOA_DP_v2(const struct point_double *pImage, const struct Potential_Mixed<double>* lens, int shalos, int nhalos, int echo = false)
{
asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins");
//printf("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins\n");
//
double one = 1.;
double threehalf = 1.5;
double half = 0.5;
//
struct point_double grad, result;
//
grad.x = (double) 0.;
grad.y = (double) 0.;
//int i = 0*shalos;
//
//IACA_START;
for(int i = shalos; i < shalos + nhalos; i++)
{
//
double b0 = lens->b0[i];
struct point_double true_coord, true_coord_rotation;
//
true_coord.x = pImage->x - lens->position_x[i];
true_coord.y = pImage->y - lens->position_y[i];
//
//true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]);
double cose = lens->anglecos[i];
double sine = lens->anglesin[i];
//
double x = true_coord.x*cose + true_coord.y*sine;
double y = true_coord.y*cose - true_coord.x*sine;
//
double ell_pot = lens->ellipticity_potential[i];
//
double val = x*x*(one - ell_pot) + y*y*(one + ell_pot);
//
double R = 1.f/sqrtf(val);
R = R*(threehalf - half*val*R*R);
//
result.x = (one - ell_pot)*b0*x*R;
result.y = (one + ell_pot)*b0*y*R;
//double R = sqrt(x*x*(1 - ell_pot) + y*y*(1 + ell_pot));
//
//result.x = (1 - ell_pot)*lens->b0[i]*x/R;
//result.y = (1 + ell_pot)*lens->b0[i]*y/R;
//
grad.x += result.x*cose - result.y*sine;
grad.y += result.y*cose + result.x*sine;
//if (echo) printf("coord = %.15f %.15f, x = %.15f %.15f, ell_pot = %.15f, res = %.15f %.15f\n", true_coord.x, true_coord.y, x, y, ell_pot, result.x, result.y);
}
//IACA_END;
return grad;
}
//
//
//
struct point_double module_potentialDerivatives_totalGradient_5_SOA_DP_v2(const struct point_double *pImage, const struct Potential_SOA *lens, int shalos, int nhalos, int echo = false)
{
asm volatile("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins");
//printf("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins\n");
//
double one = 1.;
double threehalf = 1.5;
double half = 0.5;
//
struct point_double grad, result;
//
grad.x = (double) 0.;
grad.y = (double) 0.;
//
for(int i = shalos; i < shalos + nhalos; i++)
{
//
double b0 = lens->b0[i];
struct point_double true_coord, true_coord_rotation;
//
true_coord.x = pImage->x - lens->position_x[i];
true_coord.y = pImage->y - lens->position_y[i];
//
//true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]);
double cose = lens->anglecos[i];
double sine = lens->anglesin[i];
//
double x = true_coord.x*cose + true_coord.y*sine;
double y = true_coord.y*cose - true_coord.x*sine;
//
double ell_pot = lens->ellipticity_potential[i];
//
double val = x*x*(one - ell_pot) + y*y*(one + ell_pot);
//
double R = 1.f/sqrtf(val);
R = R*(threehalf - half*val*R*R);
//
result.x = (one - ell_pot)*b0*x*R;
result.y = (one + ell_pot)*b0*y*R;
//double R = sqrt(x*x*(1 - ell_pot) + y*y*(1 + ell_pot));
//
//result.x = (1 - ell_pot)*lens->b0[i]*x/R;
//result.y = (1 + ell_pot)*lens->b0[i]*y/R;
//
grad.x += result.x*cose - result.y*sine;
grad.y += result.y*cose + result.x*sine;
//if (echo) printf("coord = %.15f %.15f, x = %.15f %.15f, ell_pot = %.15f, res = %.15f %.15f\n", true_coord.x, true_coord.y, x, y, ell_pot, result.x, result.y);
}
return grad;
}
//
//
//
//void gradient_grid_general_double_CPU(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens, int nbgridcells, const struct Potential_SOA *lens, int istart, int jstart)
void gradient_grid_general_double_CPU(double* __restrict__ grid_grad_x, double* __restrict__ grid_grad_y, const struct grid_param *frame, int Nlens, int nbgridcells, const struct Potential_Mixed<double> *lens, int istart, int jstart)
{
//printf("gradient_grid_general_double_CPU Potential_Mixed<double> %d %d\n", istart, jstart); fflush(stdout);
const int grid_dim = nbgridcells;
//
//const type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
//const type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
const double dx = (frame->xmax - frame->xmin)/(nbgridcells - 1);
const double dy = (frame->ymax - frame->ymin)/(nbgridcells - 1);
//
#pragma omp parallel
#pragma omp for
for (int jj = jstart; jj < nbgridcells; ++jj)
for (int ii = istart; ii < nbgridcells; ++ii)
{
int index = jj*nbgridcells + ii;
//
point_double image_point;
image_point.x = frame->xmin + ii*dx;
image_point.y = frame->ymin + jj*dy;
//Grad = module_potentialDerivatives_totalGradient_SOA(&image_point, lens, 0, Nlens);
//printf("%d %d: %d ", ii, jj, index);
point_double Grad_DP = module_potentialDerivatives_totalGradient_5_SOA_DP_v2(&image_point, lens, 0, Nlens, (ii == 0) && (jj == 0));
//if (index == 0) std::cout << "** " << index << " " << ii << " " << jj << " " << std::setprecision(15) << image_point.x << " " << image_point.y << ": " << Grad_DP.x << " " << Grad_DP.y << std::endl;
//
grid_grad_x[index] = (double) Grad_DP.x;
grid_grad_y[index] = (double) Grad_DP.y;
//
}
}
//
//
//
void gradient_grid_general_double_CPU(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens, int nbgridcells, const struct Potential_SOA *lens, int istart, int jstart)
{
const int grid_dim = nbgridcells;
//
//const type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
//const type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
const double dx = (frame->xmax - frame->xmin)/(nbgridcells - 1);
const double dy = (frame->ymax - frame->ymin)/(nbgridcells - 1);
//
#pragma omp parallel
#pragma omp for
for (int jj = jstart; jj < nbgridcells; ++jj)
for (int ii = istart; ii < nbgridcells; ++ii)
{
int index = jj*nbgridcells + ii;
//
point_double image_point;
image_point.x = frame->xmin + ii*dx;
image_point.y = frame->ymin + jj*dy;
//Grad = module_potentialDerivatives_totalGradient_SOA(&image_point, lens, 0, Nlens);
//printf("%d %d: %d ", ii, jj, index);
point_double Grad_DP = module_potentialDerivatives_totalGradient_5_SOA_DP_v2(&image_point, lens, 0, Nlens, 0);
//std::cout << "** " << index << " " << ii << " " << jj << " " << std::setprecision(15) << image_point.x << " " << image_point.y<< ": " << Grad_DP.x << " " << Grad_DP.y << std::endl;
//
grid_grad_x[index] = (double) Grad_DP.x;
grid_grad_y[index] = (double) Grad_DP.y;
//
}
}
//
//
//
void
gradient_grid_euler(double* grid_grad_x, double* grid_grad_y, const double* grid_grad_temp_x, const double* grid_grad_temp_y, const struct grid_param *frame, const struct Potential_SOA *lens, int Nlens, int nbgridcells_x, int nbgridcells_y, int istart, int jstart, int* count)
//gradient_grid_euler(double* grid_grad_temp_x, double* grid_grad_temp_y, double* grid_grad_x, double* grid_grad_y, const struct grid_param *frame, const struct Potential_Mixed<double> *lens, int Nlens, int nbgridcells_x, int nbgridcells_y, int istart, int jstart, int* count)
{
int loc_count = 0;
double time = -mysecond();
//
{
const double dx = (frame->xmax - frame->xmin)/(nbgridcells_x - 1);
const double dy = (frame->ymax - frame->ymin)/(nbgridcells_y - 1);
#pragma omp parallel
#pragma omp for reduction (+: loc_count)
//#pragma omp single
for (int jj = jstart + 1; jj < nbgridcells_y; ++jj)
{
grid_grad_x[jj*nbgridcells_y] = grid_grad_temp_x[jj*nbgridcells_y];
grid_grad_y[jj*nbgridcells_y] = grid_grad_temp_y[jj*nbgridcells_y];
//#pragma omp task
for (int ii = istart + 1; ii < nbgridcells_x; ++ii)
{
{
int index = (jj - 0)*nbgridcells_x + (ii - 0);
int index_north = (jj - 1)*nbgridcells_x + (ii - 0);
int index_west = (jj - 0)*nbgridcells_x + (ii - 1);
//
type_t grad_north_x = grid_grad_temp_x[index] - grid_grad_temp_x[index_north];
type_t grad_north_y = grid_grad_temp_y[index] - grid_grad_temp_y[index_north];
//
type_t grad_west_x = grid_grad_temp_x[index] - grid_grad_temp_x[index_west];
type_t grad_west_y = grid_grad_temp_y[index] - grid_grad_temp_y[index_west];
//
int c1 = fabs(dx - grad_north_x) < SIS_THRESHOLD;
int c2 = fabs(dx - grad_west_x) < SIS_THRESHOLD;
int c3 = fabs(dy - grad_north_y) < SIS_THRESHOLD;
int c4 = fabs(dy - grad_west_y) < SIS_THRESHOLD;
//
//if (c1 || c2 || c3 || c4)
if (c2 || c3)
{
struct point_double pImage;
//
pImage.x = (double) frame->xmin + ii*dx;
pImage.y = (double) frame->ymin + jj*dy;
//
//point_double Grad_DP = module_potentialDerivatives_totalGradient_5_SOA_DP_v2(&pImage, lens, 0, Nlens, ((ii == 1) && (jj == 175)));
point_double Grad_DP = module_potentialDerivatives_totalGradient_5_SOA_DP_v2(&pImage, lens, 0, Nlens, true);
//
grid_grad_x[index] = Grad_DP.x;
grid_grad_y[index] = Grad_DP.y;
loc_count++;
//if (ii == 1) printf("%d %d: %f %f -> %f %f\n", ii, jj, pImage.x, pImage.y, Grad_DP.x, Grad_DP.y);
}
else
{
grid_grad_x[index] = grid_grad_temp_x[index];
grid_grad_y[index] = grid_grad_temp_y[index];
}
}
}
}
}
*count = loc_count;
//#pragma omp taskwait
}
//
//
//
void gradient_grid_SIS_patch(double* __restrict__ grid_grad_sis_x, double* __restrict__ grid_grad_sis_y, double* __restrict__ grid_grad_x, double* __restrict__ grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int Nlens, int nbgridcells_x, int nbgridcells_y, int istart, int jstart, int* count)
//void gradient_grid_SIS_patch(double* grid_grad_x, double* grid_grad_y, const struct grid_param *frame, const struct Potential_Mixed<double> *lens, int Nlens, int nbgridcells_x, int nbgridcells_y, int istart, int jstart, int* count)
{
//
// SIS Potentials in double precision
//
printf(" ** CPU SIS patch\n"); fflush(stdout);
*count = 0;
for (int pot = 0; pot < Nlens; ++pot)
{
int patch_size = 20;
if (lens->vdisp[pot] > 900.)
{
patch_size = 200;
}
const double dx = (frame->xmax - frame->xmin)/(nbgridcells_x - 1);
const double dy = (frame->ymax - frame->ymin)/(nbgridcells_y - 1);
//
const int px = (int) ((lens->position_x[pot] - frame->xmin)/dx);
const int py = (int) ((lens->position_y[pot] - frame->ymin)/dy);
(*count)++;
//
int istart = px - patch_size/2;
int jstart = py - patch_size/2;
{
const double dx = (frame->xmax - frame->xmin)/(nbgridcells_x - 1);
const double dy = (frame->ymax - frame->ymin)/(nbgridcells_y - 1);
#pragma omp parallel
#pragma omp for collapse(2)
for (int jj = 0; jj < patch_size; ++jj)
for (int ii = 0; ii < patch_size; ++ii)
{
//
int index = (jj + jstart)*nbgridcells_x + (ii + istart);
//
struct point_double image_point_DP;
image_point_DP.x = (double) frame->xmin + (ii + istart)*dx;
image_point_DP.y = (double) frame->ymin + (jj + jstart)*dy;
//
point_double Grad_DP = module_potentialDerivatives_totalGradient_5_SOA_DP_v2(&image_point_DP, lens, 0, Nlens, 0);
//
// needs checking?
//
//printf("%.15f %.15f\n", Grad_DP.x, Grad_DP.y);
grid_grad_sis_x[index] = Grad_DP.x;
grid_grad_sis_y[index] = Grad_DP.y;
}
}
}
}
void gradient_grid_SIS_patch(double* grid_grad_x, double* grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int Nlens, int nbgridcells_x, int nbgridcells_y, int istart, int jstart, int* count)
{
gradient_grid_SIS_patch(grid_grad_x, grid_grad_y, grid_grad_x, grid_grad_y, frame, lens, Nlens, nbgridcells_x, nbgridcells_y, istart, jstart, count);
}
//
//
//
void gradient_grid_general_mixed_CPU(double* grid_grad_x, double* grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int Nlens, int nbgridcells_x, int nbgridcells_y, int istart, int jstart)
{
const int grid_dim = nbgridcells_x;
//
struct Potential_Mixed<double> lens_mixed;
alloc(lens_mixed, Nlens);
copy (lens_mixed, lens, Nlens);
//
//point image_point;
//
const type_t dx = (frame->xmax - frame->xmin)/(nbgridcells_x - 1);
const type_t dy = (frame->ymax - frame->ymin)/(nbgridcells_y - 1);
//
double* grid_grad_temp_x = (double*) malloc(nbgridcells_x*nbgridcells_y*sizeof(double));
double* grid_grad_temp_y = (double*) malloc(nbgridcells_x*nbgridcells_y*sizeof(double));
double time;
//
//printf("MIXED -> dx = %.15f, dy = %.15f, nbgridcells_x = %d, nbgridcells_y = %d\n", dx, dy, nbgridcells_x, nbgridcells_y);
//
// Float computation
//
#if 1
int count = 0;
time = -mysecond();
#pragma omp parallel
#pragma omp for
for (int jj = jstart; jj < nbgridcells_y; ++jj)
for (int ii = istart; ii < nbgridcells_x; ++ii)
{
int index = jj*nbgridcells_x + ii;
//
struct point image_point;
image_point.x = frame->xmin + (ii + istart)*dx;
image_point.y = frame->ymin + (jj + jstart)*dy;
//
point Grad = module_potentialDerivatives_totalGradient_SOA(&image_point, lens, Nlens);
//
grid_grad_temp_x[index] = (double) Grad.x;
grid_grad_temp_y[index] = (double) Grad.y;
//
}
memcpy(grid_grad_x, grid_grad_temp_x, nbgridcells_x*sizeof(double));
memcpy(grid_grad_y, grid_grad_temp_y, nbgridcells_y*sizeof(double));
#endif
time += mysecond();
printf("\n** Float computation: %f s.\n", time);
//
//
//
#ifdef __SIS
#warning "SIS CPU"
//
// SIS Potentials in double precision
//
count = 0;
time = -mysecond();
{
gradient_grid_SIS_patch(grid_grad_temp_x, grid_grad_temp_y, frame, lens, Nlens, nbgridcells_x, nbgridcells_y, istart, jstart, &count);
}
time += mysecond();
printf("** %d SIS computations: %f s.\n", count, time);
#endif
//
// Euler approximation
//
//#ifdef __EULER
#if __EULER
#warning "EULER CPU"
//memset(grid_grad_x, nbgridcells_x*nbgridcells_y*sizeof(double));
//memset(grid_grad_y, nbgridcells_x*nbgridcells_y*sizeof(double));
count = 0;
time = -mysecond();
{
//gradient_grid_euler(grid_grad_temp_x, grid_grad_temp_y, grid_grad_x, grid_grad_y, frame, &lens_mixed, Nlens, nbgridcells_x, nbgridcells_y, istart, jstart, &count);
gradient_grid_euler(grid_grad_x, grid_grad_y, grid_grad_temp_x, grid_grad_temp_y, frame, lens, Nlens, nbgridcells_x, nbgridcells_y, istart, jstart, &count);
}
time += mysecond();
printf("** %d Euler computations: %f s.\n", count, time);
#else
memcpy(grid_grad_x, grid_grad_temp_x, nbgridcells_x*nbgridcells_y*sizeof(double));
memcpy(grid_grad_y, grid_grad_temp_y, nbgridcells_x*nbgridcells_y*sizeof(double));
#endif
}
void gradient_grid_mixed_CPU(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, const struct Potential_SOA *lens, int nhalos, int nbgridcells_x, int nbgridcells_y, int istart, int jstart)
{
double dx = (frame->xmax - frame->xmin)/(nbgridcells_x - 1);
double dy = (frame->ymax - frame->ymin)/(nbgridcells_y - 1);
//
gradient_grid_general_mixed_CPU(grid_grad_x, grid_grad_y, frame, lens, nhalos, nbgridcells_x, nbgridcells_y, istart, jstart);
//
}

Event Timeline