Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91408309
simd_math.h
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Nov 10, 20:05
Size
2 KB
Mime Type
text/x-c
Expires
Tue, Nov 12, 20:05 (2 d)
Engine
blob
Format
Raw Data
Handle
22202821
Attached To
R1448 Lenstool-HPC
simd_math.h
View Options
#ifndef SIMD_MATH
#define SIMD_MATH_
//
#include <immintrin.h>
#ifdef __INTEL_COMPILER
inline __m256d operator + (__m256d a, __m256d b) {return _mm256_add_pd(a, b);}
inline __m256d operator - (__m256d a, __m256d b) {return _mm256_sub_pd(a, b);}
inline __m256d operator * (__m256d a, __m256d b) {return _mm256_mul_pd(a, b);}
inline __m256d operator / (__m256d a, __m256d b) {return _mm256_div_pd(a, b);}
#endif
//
inline __m256d RCP(const __m256d d)
{
const __m128 b = _mm256_cvtpd_ps(d);
const __m128 rcp = _mm_rcp_ps (b);
__m256d x0 = _mm256_cvtps_pd(rcp);
//
return x0;
}
//
//
//
inline __m256d RCP_1NR(const __m256d d)
{
const __m128 b = _mm256_cvtpd_ps(d);
const __m128 rcp = _mm_rcp_ps (b);
__m256d x0 = _mm256_cvtps_pd(rcp);
//
x0 = x0 + x0 - d*x0*x0;
// //
return x0;
}
//
//
//
inline __m256d RCP_2NR(const __m256d d)
{
const __m128 b = _mm256_cvtpd_ps(d);
const __m128 rcp = _mm_rcp_ps (b);
__m256d x0 = _mm256_cvtps_pd(rcp);
//
x0 = x0 + x0 - d*x0*x0;
x0 = x0 + x0 - d*x0*x0;
//
return x0;
}
inline __m256d SQRT(const __m256d d)
{
const __m128 b = _mm256_cvtpd_ps(d);
const __m128 rcp = _mm_sqrt_ps (b);
__m256d x0 = _mm256_cvtps_pd(rcp);
return x0;
}
//
//
//
inline __m256d SQRT_1NR(const __m256d d)
{
const __m128 b = _mm256_cvtpd_ps(d);
const __m128 rcp = _mm_sqrt_ps (b);
__m256d x0 = _mm256_cvtps_pd(rcp);
__m256d half = _mm256_set1_pd(0.5);
__m256d three = _mm256_set1_pd(3.);
__m256d a = RCP_1NR(d);
x0 = half*x0*(three - x0*x0*a);
return x0;
}
//
//
//
inline __m256d SQRT_2NR(const __m256d d)
{
const __m128 b = _mm256_cvtpd_ps(d);
const __m128 rcp = _mm_sqrt_ps (b);
__m256d x0 = _mm256_cvtps_pd(rcp);
__m256d half = _mm256_set1_pd(0.5);
__m256d three = _mm256_set1_pd(3.);
__m256d a = RCP_2NR(d);
x0 = half*x0*(three - x0*x0*a);
x0 = half*x0*(three - x0*x0*a);
return x0;
}
#endif
Event Timeline
Log In to Comment