Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F88013551
grid_gradient_mixed_CPU.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Oct 16, 07:52
Size
19 KB
Mime Type
text/x-c
Expires
Fri, Oct 18, 07:52 (2 d)
Engine
blob
Format
Raw Data
Handle
21700604
Attached To
R1448 Lenstool-HPC
grid_gradient_mixed_CPU.cpp
View Options
#include <stdlib.h>
#include <iomanip>
#include "grid_gradient_mixed_CPU.hpp"
#include "structure_mixed.hpp"
//#include <iacaMarks.h>
extern
"C"
{
double
mysecond
()
{
struct
timeval
tp
;
struct
timezone
tzp
;
//
int
i
=
gettimeofday
(
&
tp
,
&
tzp
);
//
return
(
(
double
)
tp
.
tv_sec
+
(
double
)
tp
.
tv_usec
*
1.e-6
);
}
}
//
//
//
//#define SIS_THRESHOLD 0.0092
//
//
//
//struct point_double module_potentialDerivatives_totalGradient_5_SOA_DP_v2(const struct point_double *pImage, const struct Potential_SOA *lens, int shalos, int nhalos, int echo = false)
//inline
struct
point_double
module_potentialDerivatives_totalGradient_5_SOA_DP_v2
(
const
struct
point_double
*
pImage
,
const
struct
Potential_Mixed
<
double
>*
lens
,
int
shalos
,
int
nhalos
,
int
echo
=
false
)
{
asm
volatile
(
"# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins"
);
//printf("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins\n");
//
double
one
=
1.
;
double
threehalf
=
1.5
;
double
half
=
0.5
;
//
struct
point_double
grad
,
result
;
//
grad
.
x
=
(
double
)
0.
;
grad
.
y
=
(
double
)
0.
;
//int i = 0*shalos;
//
//IACA_START;
for
(
int
i
=
shalos
;
i
<
shalos
+
nhalos
;
i
++
)
{
//
double
b0
=
lens
->
b0
[
i
];
struct
point_double
true_coord
,
true_coord_rotation
;
//
true_coord
.
x
=
pImage
->
x
-
lens
->
position_x
[
i
];
true_coord
.
y
=
pImage
->
y
-
lens
->
position_y
[
i
];
//
//true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]);
double
cose
=
lens
->
anglecos
[
i
];
double
sine
=
lens
->
anglesin
[
i
];
//
double
x
=
true_coord
.
x
*
cose
+
true_coord
.
y
*
sine
;
double
y
=
true_coord
.
y
*
cose
-
true_coord
.
x
*
sine
;
//
double
ell_pot
=
lens
->
ellipticity_potential
[
i
];
//
double
val
=
x
*
x
*
(
one
-
ell_pot
)
+
y
*
y
*
(
one
+
ell_pot
);
//
double
R
=
1.f
/
sqrtf
(
val
);
R
=
R
*
(
threehalf
-
half
*
val
*
R
*
R
);
//
result
.
x
=
(
one
-
ell_pot
)
*
b0
*
x
*
R
;
result
.
y
=
(
one
+
ell_pot
)
*
b0
*
y
*
R
;
//double R = sqrt(x*x*(1 - ell_pot) + y*y*(1 + ell_pot));
//
//result.x = (1 - ell_pot)*lens->b0[i]*x/R;
//result.y = (1 + ell_pot)*lens->b0[i]*y/R;
//
grad
.
x
+=
result
.
x
*
cose
-
result
.
y
*
sine
;
grad
.
y
+=
result
.
y
*
cose
+
result
.
x
*
sine
;
//if (echo) printf("coord = %.15f %.15f, x = %.15f %.15f, ell_pot = %.15f, res = %.15f %.15f\n", true_coord.x, true_coord.y, x, y, ell_pot, result.x, result.y);
}
//IACA_END;
return
grad
;
}
//
//
//
struct
point_double
module_potentialDerivatives_totalGradient_5_SOA_DP_v2
(
const
struct
point_double
*
pImage
,
const
struct
Potential_SOA
*
lens
,
int
shalos
,
int
nhalos
,
int
echo
=
false
)
{
asm
volatile
(
"# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins"
);
//printf("# module_potentialDerivatives_totalGradient_SIS_SOA_v2 begins\n");
//
double
one
=
1.
;
double
threehalf
=
1.5
;
double
half
=
0.5
;
//
struct
point_double
grad
,
result
;
//
grad
.
x
=
(
double
)
0.
;
grad
.
y
=
(
double
)
0.
;
//
for
(
int
i
=
shalos
;
i
<
shalos
+
nhalos
;
i
++
)
{
//
double
b0
=
lens
->
b0
[
i
];
struct
point_double
true_coord
,
true_coord_rotation
;
//
true_coord
.
x
=
pImage
->
x
-
lens
->
position_x
[
i
];
true_coord
.
y
=
pImage
->
y
-
lens
->
position_y
[
i
];
//
//true_coord_rotation = rotateCoordinateSystem(true_coord, lens->ellipticity_angle[i]);
double
cose
=
lens
->
anglecos
[
i
];
double
sine
=
lens
->
anglesin
[
i
];
//
double
x
=
true_coord
.
x
*
cose
+
true_coord
.
y
*
sine
;
double
y
=
true_coord
.
y
*
cose
-
true_coord
.
x
*
sine
;
//
double
ell_pot
=
lens
->
ellipticity_potential
[
i
];
//
double
val
=
x
*
x
*
(
one
-
ell_pot
)
+
y
*
y
*
(
one
+
ell_pot
);
//
double
R
=
1.f
/
sqrtf
(
val
);
R
=
R
*
(
threehalf
-
half
*
val
*
R
*
R
);
//
result
.
x
=
(
one
-
ell_pot
)
*
b0
*
x
*
R
;
result
.
y
=
(
one
+
ell_pot
)
*
b0
*
y
*
R
;
//double R = sqrt(x*x*(1 - ell_pot) + y*y*(1 + ell_pot));
//
//result.x = (1 - ell_pot)*lens->b0[i]*x/R;
//result.y = (1 + ell_pot)*lens->b0[i]*y/R;
//
grad
.
x
+=
result
.
x
*
cose
-
result
.
y
*
sine
;
grad
.
y
+=
result
.
y
*
cose
+
result
.
x
*
sine
;
//if (echo) printf("coord = %.15f %.15f, x = %.15f %.15f, ell_pot = %.15f, res = %.15f %.15f\n", true_coord.x, true_coord.y, x, y, ell_pot, result.x, result.y);
}
return
grad
;
}
//
//
//
//void gradient_grid_general_double_CPU(double *grid_grad_x, double *grid_grad_y, const struct grid_param *frame, int Nlens, int nbgridcells, const struct Potential_SOA *lens, int istart, int jstart)
void
gradient_grid_general_double_CPU
(
double
*
__restrict__
grid_grad_x
,
double
*
__restrict__
grid_grad_y
,
const
struct
grid_param
*
frame
,
int
Nlens
,
int
nbgridcells
,
const
struct
Potential_Mixed
<
double
>
*
lens
,
int
istart
,
int
jstart
)
{
//printf("gradient_grid_general_double_CPU Potential_Mixed<double> %d %d\n", istart, jstart); fflush(stdout);
const
int
grid_dim
=
nbgridcells
;
//
//const type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
//const type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
const
double
dx
=
(
frame
->
xmax
-
frame
->
xmin
)
/
(
nbgridcells
-
1
);
const
double
dy
=
(
frame
->
ymax
-
frame
->
ymin
)
/
(
nbgridcells
-
1
);
//
#pragma omp parallel
#pragma omp for
for
(
int
jj
=
jstart
;
jj
<
nbgridcells
;
++
jj
)
for
(
int
ii
=
istart
;
ii
<
nbgridcells
;
++
ii
)
{
int
index
=
jj
*
nbgridcells
+
ii
;
//
point_double
image_point
;
image_point
.
x
=
frame
->
xmin
+
ii
*
dx
;
image_point
.
y
=
frame
->
ymin
+
jj
*
dy
;
//Grad = module_potentialDerivatives_totalGradient_SOA(&image_point, lens, 0, Nlens);
//printf("%d %d: %d ", ii, jj, index);
point_double
Grad_DP
=
module_potentialDerivatives_totalGradient_5_SOA_DP_v2
(
&
image_point
,
lens
,
0
,
Nlens
,
(
ii
==
0
)
&&
(
jj
==
0
));
//if (index == 0) std::cout << "** " << index << " " << ii << " " << jj << " " << std::setprecision(15) << image_point.x << " " << image_point.y << ": " << Grad_DP.x << " " << Grad_DP.y << std::endl;
//
grid_grad_x
[
index
]
=
(
double
)
Grad_DP
.
x
;
grid_grad_y
[
index
]
=
(
double
)
Grad_DP
.
y
;
//
}
}
//
//
//
void
gradient_grid_general_double_CPU
(
double
*
grid_grad_x
,
double
*
grid_grad_y
,
const
struct
grid_param
*
frame
,
int
Nlens
,
int
nbgridcells
,
const
struct
Potential_SOA
*
lens
,
int
istart
,
int
jstart
)
{
const
int
grid_dim
=
nbgridcells
;
//
//const type_t dx = (frame->xmax - frame->xmin)/(nbgridcells-1);
//const type_t dy = (frame->ymax - frame->ymin)/(nbgridcells-1);
const
double
dx
=
(
frame
->
xmax
-
frame
->
xmin
)
/
(
nbgridcells
-
1
);
const
double
dy
=
(
frame
->
ymax
-
frame
->
ymin
)
/
(
nbgridcells
-
1
);
//
#pragma omp parallel
#pragma omp for
for
(
int
jj
=
jstart
;
jj
<
nbgridcells
;
++
jj
)
for
(
int
ii
=
istart
;
ii
<
nbgridcells
;
++
ii
)
{
int
index
=
jj
*
nbgridcells
+
ii
;
//
point_double
image_point
;
image_point
.
x
=
frame
->
xmin
+
ii
*
dx
;
image_point
.
y
=
frame
->
ymin
+
jj
*
dy
;
//Grad = module_potentialDerivatives_totalGradient_SOA(&image_point, lens, 0, Nlens);
//printf("%d %d: %d ", ii, jj, index);
point_double
Grad_DP
=
module_potentialDerivatives_totalGradient_5_SOA_DP_v2
(
&
image_point
,
lens
,
0
,
Nlens
,
0
);
//std::cout << "** " << index << " " << ii << " " << jj << " " << std::setprecision(15) << image_point.x << " " << image_point.y<< ": " << Grad_DP.x << " " << Grad_DP.y << std::endl;
//
grid_grad_x
[
index
]
=
(
double
)
Grad_DP
.
x
;
grid_grad_y
[
index
]
=
(
double
)
Grad_DP
.
y
;
//
}
}
//
//
//
void
gradient_grid_euler
(
double
*
grid_grad_x
,
double
*
grid_grad_y
,
const
double
*
grid_grad_temp_x
,
const
double
*
grid_grad_temp_y
,
const
struct
grid_param
*
frame
,
const
struct
Potential_SOA
*
lens
,
int
Nlens
,
int
nbgridcells_x
,
int
nbgridcells_y
,
int
istart
,
int
jstart
,
int
*
count
)
//gradient_grid_euler(double* grid_grad_temp_x, double* grid_grad_temp_y, double* grid_grad_x, double* grid_grad_y, const struct grid_param *frame, const struct Potential_Mixed<double> *lens, int Nlens, int nbgridcells_x, int nbgridcells_y, int istart, int jstart, int* count)
{
int
loc_count
=
0
;
double
time
=
-
mysecond
();
//
{
const
double
dx
=
(
frame
->
xmax
-
frame
->
xmin
)
/
(
nbgridcells_x
-
1
);
const
double
dy
=
(
frame
->
ymax
-
frame
->
ymin
)
/
(
nbgridcells_y
-
1
);
#pragma omp parallel
#pragma omp for reduction (+: loc_count)
//#pragma omp single
for
(
int
jj
=
jstart
+
1
;
jj
<
nbgridcells_y
;
++
jj
)
{
grid_grad_x
[
jj
*
nbgridcells_y
]
=
grid_grad_temp_x
[
jj
*
nbgridcells_y
];
grid_grad_y
[
jj
*
nbgridcells_y
]
=
grid_grad_temp_y
[
jj
*
nbgridcells_y
];
//#pragma omp task
for
(
int
ii
=
istart
+
1
;
ii
<
nbgridcells_x
;
++
ii
)
{
{
int
index
=
(
jj
-
0
)
*
nbgridcells_x
+
(
ii
-
0
);
int
index_north
=
(
jj
-
1
)
*
nbgridcells_x
+
(
ii
-
0
);
int
index_west
=
(
jj
-
0
)
*
nbgridcells_x
+
(
ii
-
1
);
//
type_t
grad_north_x
=
grid_grad_temp_x
[
index
]
-
grid_grad_temp_x
[
index_north
];
type_t
grad_north_y
=
grid_grad_temp_y
[
index
]
-
grid_grad_temp_y
[
index_north
];
//
type_t
grad_west_x
=
grid_grad_temp_x
[
index
]
-
grid_grad_temp_x
[
index_west
];
type_t
grad_west_y
=
grid_grad_temp_y
[
index
]
-
grid_grad_temp_y
[
index_west
];
//
int
c1
=
fabs
(
dx
-
grad_north_x
)
<
SIS_THRESHOLD
;
int
c2
=
fabs
(
dx
-
grad_west_x
)
<
SIS_THRESHOLD
;
int
c3
=
fabs
(
dy
-
grad_north_y
)
<
SIS_THRESHOLD
;
int
c4
=
fabs
(
dy
-
grad_west_y
)
<
SIS_THRESHOLD
;
//
//if (c1 || c2 || c3 || c4)
if
(
c2
||
c3
)
{
struct
point_double
pImage
;
//
pImage
.
x
=
(
double
)
frame
->
xmin
+
ii
*
dx
;
pImage
.
y
=
(
double
)
frame
->
ymin
+
jj
*
dy
;
//
//point_double Grad_DP = module_potentialDerivatives_totalGradient_5_SOA_DP_v2(&pImage, lens, 0, Nlens, ((ii == 1) && (jj == 175)));
point_double
Grad_DP
=
module_potentialDerivatives_totalGradient_5_SOA_DP_v2
(
&
pImage
,
lens
,
0
,
Nlens
,
true
);
//
grid_grad_x
[
index
]
=
Grad_DP
.
x
;
grid_grad_y
[
index
]
=
Grad_DP
.
y
;
loc_count
++
;
//if (ii == 1) printf("%d %d: %f %f -> %f %f\n", ii, jj, pImage.x, pImage.y, Grad_DP.x, Grad_DP.y);
}
else
{
grid_grad_x
[
index
]
=
grid_grad_temp_x
[
index
];
grid_grad_y
[
index
]
=
grid_grad_temp_y
[
index
];
}
}
}
}
}
*
count
=
loc_count
;
//#pragma omp taskwait
}
//
//
//
void
gradient_grid_SIS_patch
(
double
*
__restrict__
grid_grad_sis_x
,
double
*
__restrict__
grid_grad_sis_y
,
double
*
__restrict__
grid_grad_x
,
double
*
__restrict__
grid_grad_y
,
const
struct
grid_param
*
frame
,
const
struct
Potential_SOA
*
lens
,
int
Nlens
,
int
nbgridcells_x
,
int
nbgridcells_y
,
int
istart
,
int
jstart
,
int
*
count
)
//void gradient_grid_SIS_patch(double* grid_grad_x, double* grid_grad_y, const struct grid_param *frame, const struct Potential_Mixed<double> *lens, int Nlens, int nbgridcells_x, int nbgridcells_y, int istart, int jstart, int* count)
{
//
// SIS Potentials in double precision
//
printf
(
" ** CPU SIS patch
\n
"
);
fflush
(
stdout
);
*
count
=
0
;
for
(
int
pot
=
0
;
pot
<
Nlens
;
++
pot
)
{
int
patch_size
=
20
;
if
(
lens
->
vdisp
[
pot
]
>
900.
)
{
patch_size
=
200
;
}
const
double
dx
=
(
frame
->
xmax
-
frame
->
xmin
)
/
(
nbgridcells_x
-
1
);
const
double
dy
=
(
frame
->
ymax
-
frame
->
ymin
)
/
(
nbgridcells_y
-
1
);
//
const
int
px
=
(
int
)
((
lens
->
position_x
[
pot
]
-
frame
->
xmin
)
/
dx
);
const
int
py
=
(
int
)
((
lens
->
position_y
[
pot
]
-
frame
->
ymin
)
/
dy
);
(
*
count
)
++
;
//
int
istart
=
px
-
patch_size
/
2
;
int
jstart
=
py
-
patch_size
/
2
;
{
const
double
dx
=
(
frame
->
xmax
-
frame
->
xmin
)
/
(
nbgridcells_x
-
1
);
const
double
dy
=
(
frame
->
ymax
-
frame
->
ymin
)
/
(
nbgridcells_y
-
1
);
#pragma omp parallel
#pragma omp for collapse(2)
for
(
int
jj
=
0
;
jj
<
patch_size
;
++
jj
)
for
(
int
ii
=
0
;
ii
<
patch_size
;
++
ii
)
{
//
int
index
=
(
jj
+
jstart
)
*
nbgridcells_x
+
(
ii
+
istart
);
//
struct
point_double
image_point_DP
;
image_point_DP
.
x
=
(
double
)
frame
->
xmin
+
(
ii
+
istart
)
*
dx
;
image_point_DP
.
y
=
(
double
)
frame
->
ymin
+
(
jj
+
jstart
)
*
dy
;
//
point_double
Grad_DP
=
module_potentialDerivatives_totalGradient_5_SOA_DP_v2
(
&
image_point_DP
,
lens
,
0
,
Nlens
,
0
);
//
// needs checking?
//
//printf("%.15f %.15f\n", Grad_DP.x, Grad_DP.y);
grid_grad_sis_x
[
index
]
=
Grad_DP
.
x
;
grid_grad_sis_y
[
index
]
=
Grad_DP
.
y
;
}
}
}
}
void
gradient_grid_SIS_patch
(
double
*
grid_grad_x
,
double
*
grid_grad_y
,
const
struct
grid_param
*
frame
,
const
struct
Potential_SOA
*
lens
,
int
Nlens
,
int
nbgridcells_x
,
int
nbgridcells_y
,
int
istart
,
int
jstart
,
int
*
count
)
{
gradient_grid_SIS_patch
(
grid_grad_x
,
grid_grad_y
,
grid_grad_x
,
grid_grad_y
,
frame
,
lens
,
Nlens
,
nbgridcells_x
,
nbgridcells_y
,
istart
,
jstart
,
count
);
}
//
//
//
void
gradient_grid_general_mixed_CPU
(
double
*
grid_grad_x
,
double
*
grid_grad_y
,
const
struct
grid_param
*
frame
,
const
struct
Potential_SOA
*
lens
,
int
Nlens
,
int
nbgridcells_x
,
int
nbgridcells_y
,
int
istart
,
int
jstart
)
{
const
int
grid_dim
=
nbgridcells_x
;
//
struct
Potential_Mixed
<
double
>
lens_mixed
;
alloc
(
lens_mixed
,
Nlens
);
copy
(
lens_mixed
,
lens
,
Nlens
);
//
//point image_point;
//
const
type_t
dx
=
(
frame
->
xmax
-
frame
->
xmin
)
/
(
nbgridcells_x
-
1
);
const
type_t
dy
=
(
frame
->
ymax
-
frame
->
ymin
)
/
(
nbgridcells_y
-
1
);
//
double
*
grid_grad_temp_x
=
(
double
*
)
malloc
(
nbgridcells_x
*
nbgridcells_y
*
sizeof
(
double
));
double
*
grid_grad_temp_y
=
(
double
*
)
malloc
(
nbgridcells_x
*
nbgridcells_y
*
sizeof
(
double
));
double
time
;
//
//printf("MIXED -> dx = %.15f, dy = %.15f, nbgridcells_x = %d, nbgridcells_y = %d\n", dx, dy, nbgridcells_x, nbgridcells_y);
//
// Float computation
//
#if 1
int
count
=
0
;
time
=
-
mysecond
();
#pragma omp parallel
#pragma omp for
for
(
int
jj
=
jstart
;
jj
<
nbgridcells_y
;
++
jj
)
for
(
int
ii
=
istart
;
ii
<
nbgridcells_x
;
++
ii
)
{
int
index
=
jj
*
nbgridcells_x
+
ii
;
//
struct
point
image_point
;
image_point
.
x
=
frame
->
xmin
+
(
ii
+
istart
)
*
dx
;
image_point
.
y
=
frame
->
ymin
+
(
jj
+
jstart
)
*
dy
;
//
point
Grad
=
module_potentialDerivatives_totalGradient_SOA
(
&
image_point
,
lens
,
Nlens
);
//
grid_grad_temp_x
[
index
]
=
(
double
)
Grad
.
x
;
grid_grad_temp_y
[
index
]
=
(
double
)
Grad
.
y
;
//
}
memcpy
(
grid_grad_x
,
grid_grad_temp_x
,
nbgridcells_x
*
sizeof
(
double
));
memcpy
(
grid_grad_y
,
grid_grad_temp_y
,
nbgridcells_y
*
sizeof
(
double
));
#endif
time
+=
mysecond
();
printf
(
"
\n
** Float computation: %f s.
\n
"
,
time
);
//
//
//
#ifdef __SIS
#warning "SIS CPU"
//
// SIS Potentials in double precision
//
count
=
0
;
time
=
-
mysecond
();
{
gradient_grid_SIS_patch
(
grid_grad_temp_x
,
grid_grad_temp_y
,
frame
,
lens
,
Nlens
,
nbgridcells_x
,
nbgridcells_y
,
istart
,
jstart
,
&
count
);
}
time
+=
mysecond
();
printf
(
"** %d SIS computations: %f s.
\n
"
,
count
,
time
);
#endif
//
// Euler approximation
//
//#ifdef __EULER
#if __EULER
#warning "EULER CPU"
//memset(grid_grad_x, nbgridcells_x*nbgridcells_y*sizeof(double));
//memset(grid_grad_y, nbgridcells_x*nbgridcells_y*sizeof(double));
count
=
0
;
time
=
-
mysecond
();
{
//gradient_grid_euler(grid_grad_temp_x, grid_grad_temp_y, grid_grad_x, grid_grad_y, frame, &lens_mixed, Nlens, nbgridcells_x, nbgridcells_y, istart, jstart, &count);
gradient_grid_euler
(
grid_grad_x
,
grid_grad_y
,
grid_grad_temp_x
,
grid_grad_temp_y
,
frame
,
lens
,
Nlens
,
nbgridcells_x
,
nbgridcells_y
,
istart
,
jstart
,
&
count
);
}
time
+=
mysecond
();
printf
(
"** %d Euler computations: %f s.
\n
"
,
count
,
time
);
#else
memcpy
(
grid_grad_x
,
grid_grad_temp_x
,
nbgridcells_x
*
nbgridcells_y
*
sizeof
(
double
));
memcpy
(
grid_grad_y
,
grid_grad_temp_y
,
nbgridcells_x
*
nbgridcells_y
*
sizeof
(
double
));
#endif
}
void
gradient_grid_mixed_CPU
(
double
*
grid_grad_x
,
double
*
grid_grad_y
,
const
struct
grid_param
*
frame
,
const
struct
Potential_SOA
*
lens
,
int
nhalos
,
int
nbgridcells_x
,
int
nbgridcells_y
,
int
istart
,
int
jstart
)
{
double
dx
=
(
frame
->
xmax
-
frame
->
xmin
)
/
(
nbgridcells_x
-
1
);
double
dy
=
(
frame
->
ymax
-
frame
->
ymin
)
/
(
nbgridcells_y
-
1
);
//
gradient_grid_general_mixed_CPU
(
grid_grad_x
,
grid_grad_y
,
frame
,
lens
,
nhalos
,
nbgridcells_x
,
nbgridcells_y
,
istart
,
jstart
);
//
}
Event Timeline
Log In to Comment