Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91647464
main.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Nov 13, 01:43
Size
20 KB
Mime Type
text/x-c
Expires
Fri, Nov 15, 01:43 (2 d)
Engine
blob
Format
Raw Data
Handle
22295337
Attached To
rTENSCONT Tensor Contractor
main.cpp
View Options
#include <cassert>
#include <cstdio>
#include <cmath>
#include <iostream>
#include <chrono>
#include <fstream>
#include <sstream>
#include <string>
#include <mpirxx.h>
//#include <gmp-impl.h>
//#define DEBUG_TRACE
#ifndef DEBUG_TRACE
// This optimization allows to compute only the
// 2n+1 independent components but it biases the
// trace check since trace will trivially be forced
// to 0.
#define EP0_COMPUTE_OPTIM
#endif
constexpr
mp_bitcnt_t
default_prec
=
512
;
mpf_class
*
all_ep0_p
;
//unsigned int* multiset_buffer;
//unsigned int* combination_buffer;
inline
size_t
get_ep0_size
(
size_t
p
)
{
size_t
size
=
(
p
+
1
)
*
(
p
+
2
)
/
2
;
return
size
;
}
inline
size_t
get_ep0_idx
(
size_t
p1
,
size_t
p2
,
size_t
p3
)
{
size_t
p
=
p1
+
p2
+
p3
;
size_t
idx
=
(
p3
*
(
2
*
p
+
3
-
p3
))
/
2
+
p2
;
return
idx
;
}
inline
void
get_ep0_coords
(
size_t
p
,
size_t
idx
,
size_t
*
p1
,
size_t
*
p2
,
size_t
*
p3
)
{
size_t
ep0_size
=
get_ep0_size
(
p
);
size_t
reverse_idx
=
ep0_size
-
idx
-
1
;
size_t
A
=
(
size_t
)
std
::
ceil
(
(
-
1.0
+
std
::
sqrt
(
1
+
8
*
(
reverse_idx
+
1
)))
/
2.0
);
*
p3
=
p
+
1
-
A
;
*
p2
=
idx
-
(
ep0_size
-
A
*
(
A
+
1
)
/
2
);
*
p1
=
p
-
*
p3
-
*
p2
;
}
inline
mpf_class
coeff_Np0
(
size_t
p
)
{
// (2p)!/(p!)
mpf_class
p2fac_over_pfac
=
1.0
;
for
(
size_t
fac
=
2
*
p
;
fac
>
p
;
--
fac
)
p2fac_over_pfac
*=
fac
;
// p!
mpf_class
pfac
=
1.0
;
for
(
size_t
fac
=
2
;
fac
<=
p
;
++
fac
)
pfac
*=
fac
;
// 1/2^p
mpf_class
one_over_2p
=
std
::
pow
(
2.0
,
-
(
int
)
p
);
// sqrt( (2p)! / 2^p / (p!)^2 )
mpf_class
result
=
sqrt
(
one_over_2p
*
p2fac_over_pfac
/
pfac
);
return
result
;
}
inline
mpf_class
arrangement_factor
(
size_t
n
,
size_t
m
)
{
assert
(
m
<=
n
/
2
);
size_t
m2
=
n
-
2
*
m
;
// Note: m <= n/2, thus m <= m2
mpf_class
nfac_over_m2fac
=
1.0
;
for
(
size_t
fac
=
n
;
fac
>
m2
;
--
fac
)
nfac_over_m2fac
*=
fac
;
mpf_class
mfac
=
1.0
;
for
(
size_t
fac
=
2
;
fac
<=
m
;
++
fac
)
mfac
*=
fac
;
mpf_class
one_over_2m
=
std
::
pow
(
2.0
,
-
(
int
)
m
);
mpf_class
result
=
one_over_2m
*
nfac_over_m2fac
/
mfac
;
return
result
;
}
mpf_class
compute_ep0
(
size_t
p1
,
size_t
p2
,
size_t
p3
)
{
// Trivial case
if
((
p1
%
2
)
+
(
p2
%
2
)
>
0
)
return
0.0
;
size_t
p
=
p1
+
p2
+
p3
;
size_t
k1
=
p1
/
2
;
size_t
k2
=
p2
/
2
;
size_t
k3
=
p3
/
2
;
mpf_class
result
=
0.0
;
mpf_class
sign
=
(
k1
+
k2
)
%
2
==
0
?
-
1.0
:
1.0
;
for
(
size_t
m3
=
0
;
m3
<=
k3
;
++
m3
)
{
sign
*=
-
1.0
;
mpf_class
p1fac_over_k1fac
=
1.0
;
for
(
size_t
fac
=
p1
;
fac
>
k1
;
--
fac
)
p1fac_over_k1fac
*=
fac
;
mpf_class
p2fac_over_k2fac
=
1.0
;
for
(
size_t
fac
=
p2
;
fac
>
k2
;
--
fac
)
p2fac_over_k2fac
*=
fac
;
assert
(
(
p
+
p3
-
2
*
m3
-
1
)
%
2
==
(
2
*
p
-
1
)
%
2
);
// NOTE: Here the denominator is greater than numerator
mpf_class
one_over_double_fac_ratio
=
1.0
;
for
(
size_t
fac
=
2
*
p
-
1
;
fac
>
p
+
p3
-
2
*
m3
-
1
;
fac
-=
2
)
one_over_double_fac_ratio
*=
fac
;
mpf_class
one_over_2k1k2
=
std
::
pow
(
2.0
,
-
(
int
)(
k1
+
k2
));
result
+=
sign
*
one_over_2k1k2
/
one_over_double_fac_ratio
*
p1fac_over_k1fac
*
p2fac_over_k2fac
*
arrangement_factor
(
p3
,
m3
);
}
result
*=
coeff_Np0
(
p
);
return
result
;
}
#define max(a,b) (((a)<(b))?(b):(a))
void
check_trace_of_ep0
(
size_t
p
,
mpf_class
*
ep0_p
)
{
std
::
cout
<<
"=== Begin Check Trace (p: "
<<
p
<<
") ===
\n
"
;
if
(
p
<
2
)
{
std
::
cout
<<
" - Trace is only valide for tansor of rank p >= 2.
\n
"
;
return
;
}
mpf_class
max_error
=
0.0
;
size_t
n
=
p
-
2
;
size_t
n1
=
n
;
size_t
n2
=
0
;
size_t
n3
=
0
;
do
{
size_t
idx1
=
get_ep0_idx
(
n1
+
2
,
n2
,
n3
);
size_t
idx2
=
get_ep0_idx
(
n1
,
n2
+
2
,
n3
);
size_t
idx3
=
get_ep0_idx
(
n1
,
n2
,
n3
+
2
);
mpf_class
trace
=
ep0_p
[
idx1
]
+
ep0_p
[
idx2
]
+
ep0_p
[
idx3
];
//std::printf("Trace: %g (%g, %g, %g)\n", trace, ep0_p[idx1],ep0_p[idx2],ep0_p[idx3]);
max_error
=
max
(
abs
(
trace
),
max_error
);
n2
=
(
++
n2
)
%
(
n
+
1
-
n3
);
if
(
n2
==
0
)
++
n3
;
n1
=
n
-
n2
-
n3
;
}
while
(
n3
<=
n
);
std
::
cout
<<
" - Max Error: "
<<
max_error
<<
"
\n
"
;
}
inline
size_t
get_ep0_offset_from_p
(
size_t
p
)
{
size_t
offset
=
p
*
(
p
+
1
)
*
(
p
+
2
)
/
6
;
return
offset
;
}
inline
size_t
get_ep0_p_from_offseted_idx
(
size_t
idx
)
{
// NOTE: Exact solution is not precise enough due to
// fp errors. This is why I choosed to go with Newton's root finding
mpf_class
x
=
0.0
;
mpf_class
offset_f
=
-
1.0
;
while
(
abs
(
x
-
offset_f
)
>
1e-4
)
// magic number, have been tested to be enough for p -> 1'000'000
{
offset_f
=
x
;
x
=
(
2
*
x
*
x
*
x
+
3
*
x
*
x
+
6
*
idx
)
/
(
3
*
x
*
x
+
6
*
x
+
2
);
//std::cout << "x = " << x << "\n";
}
size_t
offset
=
(
size_t
)(
offset_f
.
get_d
());
return
offset
;
}
inline
size_t
get_ep0_offset_from_offseted_idx
(
size_t
idx
)
{
return
get_ep0_offset_from_p
(
get_ep0_p_from_offseted_idx
(
idx
));
}
void
fill_ep0
(
size_t
p
,
mpf_class
*
ep0_p
)
{
size_t
p1
=
p
;
size_t
p2
=
0
;
size_t
p3
=
0
;
do
{
size_t
idx
=
get_ep0_idx
(
p1
,
p2
,
p3
);
#ifdef EP0_COMPUTE_OPTIM
if
(
idx
<=
2
*
p
)
// <=> idx+1 < 2p+1
#endif
{
ep0_p
[
idx
]
=
compute_ep0
(
p1
,
p2
,
p3
);
}
#ifdef EP0_COMPUTE_OPTIM
else
#else
if
(
false
)
#endif
{
assert
(
p3
>=
2
);
size_t
idx1
=
get_ep0_idx
(
p1
+
2
,
p2
,
p3
-
2
);
size_t
idx2
=
get_ep0_idx
(
p1
,
p2
+
2
,
p3
-
2
);
ep0_p
[
idx
]
=
-
ep0_p
[
idx1
]
-
ep0_p
[
idx2
];
}
//if(ep0_p[idx] != 0.0)
{
//std::printf("- (%zu, %zu, %zu) is located at pos %zu", p1, p2, p3, idx);
//std::cout << " -> value is " << ep0_p[idx] << "\n";
}
p2
=
(
++
p2
)
%
(
p
+
1
-
p3
);
if
(
p2
==
0
)
++
p3
;
p1
=
p
-
p2
-
p3
;
}
while
(
p3
<=
p
);
}
void
DEBUG_clear_all_ep0
(
size_t
p_max
,
mpf_class
*
all_ep0_p
)
{
size_t
upper_bound
=
get_ep0_offset_from_p
(
p_max
+
1
);
for
(
size_t
i
=
0
;
i
<
upper_bound
;
++
i
)
all_ep0_p
[
i
]
=
-
100.0
;
}
void
DEBUG_dump_all_ep0
(
size_t
p_max
,
mpf_class
*
all_ep0_p
)
{
std
::
cout
<<
" === DUMP ALL ep0 ===
\n
"
;
for
(
size_t
p
=
0
;
p
<=
p_max
;
++
p
)
{
size_t
line_counter
=
0
;
for
(
size_t
i
=
0
;
i
<
get_ep0_size
(
p
);
++
i
)
{
std
::
cout
<<
all_ep0_p
[
get_ep0_offset_from_p
(
p
)
+
i
]
<<
" "
;
++
line_counter
;
if
(
line_counter
%
12
==
0
)
{
std
::
cout
<<
"
\n
"
;
}
}
if
(
p
<
p_max
)
std
::
cout
<<
"
\n
-------- "
;
std
::
cout
<<
"
\n
"
;
}
}
void
fill_all_ep0
(
size_t
p_max
,
mpf_class
*
all_ep0_p
)
{
for
(
size_t
p
=
0
;
p
<=
p_max
;
++
p
)
{
mpf_class
*
ep0_p
=
all_ep0_p
+
get_ep0_offset_from_p
(
p
);
fill_ep0
(
p
,
ep0_p
);
}
}
void
check_trace_of_all_ep0
(
size_t
p_max
,
mpf_class
*
all_ep0_p
)
{
for
(
size_t
p
=
0
;
p
<=
p_max
;
++
p
)
check_trace_of_ep0
(
p
,
all_ep0_p
+
get_ep0_offset_from_p
(
p
));
}
void
dump_mpf_to_buffer
(
mpf_class
*
value_p
,
int
*
prec_p
,
int
*
size_p
,
mp_exp_t
*
exp_p
,
mp_limb_t
*
limbs_p
)
{
// NOTE: This procedure is only valid beacause mpf is guarantied to be of fixed precision
*
prec_p
=
value_p
->
get_mpf_t
()
->
_mp_prec
;
assert
(
value_p
->
get_prec
()
==
default_prec
);
*
size_p
=
value_p
->
get_mpf_t
()
->
_mp_size
;
*
exp_p
=
value_p
->
get_mpf_t
()
->
_mp_exp
;
for
(
size_t
i
=
0
;
i
<
abs
(
*
size_p
);
++
i
)
limbs_p
[
i
]
=
*
(
value_p
->
get_mpf_t
()
->
_mp_d
+
i
);
}
void
undump_mpf_from_buffer
(
mpf_class
*
value_p
,
int
*
prec_p
,
int
*
size_p
,
mp_exp_t
*
exp_p
,
mp_limb_t
*
limbs_p
)
{
// NOTE: This procedure is only valid beacause mpf is guarantied to be of fixed precision
value_p
->
get_mpf_t
()
->
_mp_prec
=
*
prec_p
;
value_p
->
get_mpf_t
()
->
_mp_size
=
*
size_p
;
value_p
->
get_mpf_t
()
->
_mp_exp
=
*
exp_p
;
for
(
size_t
i
=
0
;
i
<
abs
(
*
size_p
);
++
i
)
*
(
value_p
->
get_mpf_t
()
->
_mp_d
+
i
)
=
limbs_p
[
i
];
}
struct
mpf_content_buffer
{
size_t
size
;
// ---
int
*
size_p
;
int
*
prec_p
;
mp_exp_t
*
exp_p
;
mp_limb_t
*
limbs_p
;
};
void
init_mpf_content_buffer
(
mpf_content_buffer
*
buffer
,
size_t
size
)
{
buffer
->
size
=
size
;
buffer
->
size_p
=
new
int
[
size
];
buffer
->
prec_p
=
new
int
[
size
];
buffer
->
exp_p
=
new
mp_exp_t
[
size
];
buffer
->
limbs_p
=
new
mp_limb_t
[
size
*
(
default_prec
+
1
)];
}
void
clear_mpf_content_buffer
(
mpf_content_buffer
*
buffer
)
{
delete
[]
buffer
->
limbs_p
;
delete
[]
buffer
->
exp_p
;
delete
[]
buffer
->
prec_p
;
delete
[]
buffer
->
size_p
;
buffer
->
size
=
0
;
}
void
dump_array_of_mpf_to_buffer
(
mpf_class
*
value_p
,
mpf_content_buffer
*
buffer
,
size_t
size
)
{
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
dump_mpf_to_buffer
(
value_p
+
i
,
buffer
->
prec_p
+
i
,
buffer
->
size_p
+
i
,
buffer
->
exp_p
+
i
,
buffer
->
limbs_p
+
i
*
(
default_prec
+
1
));
}
}
void
undump_array_of_mpf_from_buffer
(
mpf_class
*
value_p
,
mpf_content_buffer
*
buffer
,
size_t
size
)
{
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
undump_mpf_from_buffer
(
value_p
+
i
,
buffer
->
prec_p
+
i
,
buffer
->
size_p
+
i
,
buffer
->
exp_p
+
i
,
buffer
->
limbs_p
+
i
*
(
default_prec
+
1
));
}
}
// Thanks to Martin Broadhurst (http://www.martinbroadhurst.com/combinations-of-a-multiset.html)
unsigned
int
next_multiset_combination
(
const
unsigned
int
*
multiset
,
unsigned
int
*
ar
,
size_t
n
,
size_t
k
)
{
bool
finished
=
false
;
bool
changed
=
false
;
size_t
i
;
for
(
i
=
k
-
1
;
!
finished
&&
!
changed
;
i
--
)
{
if
(
ar
[
i
]
<
multiset
[
i
+
(
n
-
k
)])
{
/* Find the successor */
size_t
j
;
for
(
j
=
0
;
multiset
[
j
]
<=
ar
[
i
];
j
++
);
/* Replace this element with it */
ar
[
i
]
=
multiset
[
j
];
if
(
i
<
k
-
1
)
{
/* Make the elements after it the same as this part of the multiset */
size_t
l
;
for
(
l
=
i
+
1
,
j
=
j
+
1
;
l
<
k
;
l
++
,
j
++
)
{
ar
[
l
]
=
multiset
[
j
];
}
}
changed
=
true
;
}
finished
=
(
i
==
0
);
}
if
(
!
changed
)
{
/* Reset to first combination */
for
(
i
=
0
;
i
<
k
;
i
++
)
{
ar
[
i
]
=
multiset
[
i
];
}
}
return
changed
;
}
unsigned
int
*
create_multiset
(
size_t
n1
,
size_t
n2
,
size_t
n3
,
unsigned
int
*
buffer
)
{
// Buffer is supposed to be big enough!
unsigned
int
*
multiset
=
buffer
;
size_t
i
=
0
;
for
(;
i
<
n1
;
++
i
)
multiset
[
i
]
=
1
;
for
(;
i
<
n1
+
n2
;
++
i
)
multiset
[
i
]
=
2
;
for
(;
i
<
n1
+
n2
+
n3
;
++
i
)
multiset
[
i
]
=
3
;
return
multiset
;
}
unsigned
int
*
create_first_combination_from_multiset
(
unsigned
int
*
multiset
,
size_t
k
,
unsigned
int
*
buffer
)
{
unsigned
int
*
combination
=
buffer
;
for
(
size_t
i
=
0
;
i
<
k
;
++
i
)
combination
[
i
]
=
multiset
[
i
];
return
combination
;
}
void
read_combination_of_multiset
(
unsigned
int
*
combination
,
size_t
p1
,
size_t
p2
,
size_t
p3
,
size_t
*
L1
,
size_t
*
L2
,
size_t
*
L3
,
size_t
*
R1
,
size_t
*
R2
,
size_t
*
R3
)
{
size_t
i
=
0
;
*
L1
=
*
L2
=
*
L3
=
0
;
for
(;
combination
[
i
]
==
1
;
++
i
)
++
(
*
L1
);
*
R1
=
p1
-
*
L1
;
for
(;
combination
[
i
]
==
2
;
++
i
)
++
(
*
L2
);
*
R2
=
p2
-
*
L2
;
for
(;
combination
[
i
]
==
3
;
++
i
)
++
(
*
L3
);
*
R3
=
p3
-
*
L3
;
}
inline
mpf_class
binomial
(
size_t
n
,
size_t
p
)
{
size_t
p1
=
(
p
>
n
-
p
)
?
p
:
n
-
p
;
size_t
p2
=
n
-
p1
;
mpf_class
p2fac
=
1.0
;
for
(
size_t
fac
=
1
;
fac
<=
p2
;
++
fac
)
p2fac
*=
fac
;
mpf_class
result
=
1.0
;
for
(
size_t
fac
=
n
;
fac
>
p1
;
--
fac
)
result
*=
fac
;
result
/=
p2fac
;
return
result
;
}
inline
mpf_class
trinomial
(
size_t
n1
,
size_t
n2
,
size_t
n3
)
{
// This ensure that n1_ is the greatest of the three
size_t
n1_
,
n2_
,
n3_
;
if
(
n1
>=
n2
&&
n1
>=
n3
)
{
n1_
=
n1
;
n2_
=
n2
;
n3_
=
n3
;
}
else
if
(
n2
>=
n1
&&
n2
>=
n3
)
{
n1_
=
n2
;
n2_
=
n1
;
n3_
=
n3
;
}
else
{
n1_
=
n3
;
n2_
=
n1
;
n3_
=
n2
;
}
mpf_class
n2fac
=
1.0
;
mpf_class
n3fac
=
1.0
;
for
(
size_t
fac
=
1
;
fac
<=
n2_
;
++
fac
)
n2fac
*=
fac
;
for
(
size_t
fac
=
1
;
fac
<=
n3_
;
++
fac
)
n3fac
*=
fac
;
mpf_class
multinomial
=
1.0
;
for
(
size_t
fac
=
n1
+
n2
+
n3
;
fac
>
n1_
;
--
fac
)
multinomial
*=
fac
;
multinomial
/=
(
n2fac
*
n3fac
);
return
multinomial
;
}
mpf_class
compute_EP0S
(
size_t
p
,
size_t
n
,
size_t
pp
,
size_t
p1
,
size_t
p2
,
size_t
p3
)
{
assert
(
p1
+
p2
+
p3
==
p
+
pp
-
2
*
n
);
// Compute the symmetric part of (ep0 .n ep'0)
mpf_class
result
=
0.0
;
// Sum over k-combinations:
size_t
k
=
p
-
n
;
if
(
k
==
0
)
return
1.0
;
// NOTE(Sam): This allocation is suboptimal but I had issue with the crapy allocator
// I implemented eralier and it works like that. I'll try to optimize it if needed
// in the future.
unsigned
int
*
multiset
=
new
unsigned
int
[
p1
+
p2
+
p3
];
unsigned
int
*
combination
=
new
unsigned
int
[
k
];
multiset
=
create_multiset
(
p1
,
p2
,
p3
,
multiset
);
combination
=
create_first_combination_from_multiset
(
multiset
,
k
,
combination
);
/*
std::cout << "p1+p2+p3="<<p1+p2+p3<<", k="<<k<<"\n";
for(size_t i = 0; i < p1+p2+p3; ++i)
std::cout << multiset[i] << " ";
std::cout << "\n";
for(size_t i = 0; i < k; ++i)
std::cout << combination[i] << " ";
std::cout << "\n";
std::cout << "---" "\n";
//*/
//std::cout << "---\n";
do
{
/*
for(size_t i = 0; i < k; ++i)
std::cout << combination[i] << " ";
std::cout << "\n";
//*/
size_t
L1
,
L2
,
L3
,
R1
,
R2
,
R3
;
read_combination_of_multiset
(
combination
,
p1
,
p2
,
p3
,
&
L1
,
&
L2
,
&
L3
,
&
R1
,
&
R2
,
&
R3
);
//std::cout << "(" << L1 << ", " << L2 << ", " << L3 << ") - (" << R1 << ", " << R2 << ", " << R3 << ")\n";
mpf_class
binome_1
=
binomial
(
p1
,
L1
);
mpf_class
binome_2
=
binomial
(
p2
,
L2
);
mpf_class
binome_3
=
binomial
(
p3
,
L3
);
// Sum with n1+n2+n3=n
size_t
n1
=
n
;
size_t
n2
=
0
;
size_t
n3
=
0
;
do
{
mpf_class
multinomial
=
trinomial
(
n1
,
n2
,
n3
);
size_t
idx1
=
get_ep0_offset_from_p
(
p
)
+
get_ep0_idx
(
L1
+
n1
,
L2
+
n2
,
L3
+
n3
);
size_t
idx2
=
get_ep0_offset_from_p
(
pp
)
+
get_ep0_idx
(
R1
+
n1
,
R2
+
n2
,
R3
+
n3
);
result
+=
binome_1
*
binome_2
*
binome_3
*
multinomial
*
all_ep0_p
[
idx1
]
*
all_ep0_p
[
idx2
];
n2
=
(
++
n2
)
%
(
n
+
1
-
n3
);
if
(
n2
==
0
)
++
n3
;
n1
=
n
-
n2
-
n3
;
}
while
(
n3
<=
n
);
// End sum with n1+n2+n3=n
}
while
(
next_multiset_combination
(
multiset
,
combination
,
p1
+
p2
+
p3
,
k
));
// End sum over k-combinations
delete
[]
combination
;
delete
[]
multiset
;
// Compute prefactor (p-n)!(pp-n)!/(p+p-2n)!
size_t
upper_left
,
upper_right
;
size_t
lower
=
p
+
pp
-
2
*
n
;
if
(
p
-
n
>
pp
-
n
)
{
upper_left
=
p
-
n
;
upper_right
=
pp
-
n
;
}
else
{
upper_left
=
pp
-
n
;
upper_right
=
p
-
n
;
}
mpf_class
lower_fac_over_upper_left_fac
=
1.0
;
for
(
size_t
fac
=
lower
;
fac
>
upper_left
;
--
fac
)
lower_fac_over_upper_left_fac
*=
fac
;
mpf_class
upper_right_fac
=
1.0
;
for
(
size_t
fac
=
1
;
fac
<=
upper_right
;
++
fac
)
upper_right_fac
*=
fac
;
result
*=
upper_right_fac
/
lower_fac_over_upper_left_fac
;
return
result
;
}
mpf_class
compute_EP0
(
size_t
p
,
size_t
n
,
size_t
pp
,
size_t
p1
,
size_t
p2
,
size_t
p3
)
{
// Compute the traceless and symmetric part of (ep0 .n ep'0)
mpf_class
result
=
0.0
;
size_t
k1
=
p1
/
2
;
size_t
k2
=
p2
/
2
;
size_t
k3
=
p3
/
2
;
for
(
size_t
m1
=
0
;
m1
<=
k1
;
++
m1
)
for
(
size_t
m2
=
0
;
m2
<=
k2
;
++
m2
)
for
(
size_t
m3
=
0
;
m3
<=
k3
;
++
m3
)
{
mpf_class
sign
=
(
(
m1
+
m2
+
m3
)
%
2
==
0
?
1.0
:
-
1.0
);
mpf_class
arr_factor_1
=
arrangement_factor
(
p1
,
m1
);
mpf_class
arr_factor_2
=
arrangement_factor
(
p2
,
m2
);
mpf_class
arr_factor_3
=
arrangement_factor
(
p3
,
m3
);
mpf_class
prefactor
=
sign
*
arr_factor_1
*
arr_factor_2
*
arr_factor_3
;
if
(
(
2
*
(
p
+
pp
-
2
*
n
)
-
2
*
(
m1
+
m2
+
m3
)
-
1
)
%
2
==
(
2
*
(
p
+
pp
-
2
*
n
)
-
1
)
%
2
)
{
mpf_class
one_over_double_fac
=
1.0
;
for
(
size_t
fac
=
(
2
*
(
p
+
pp
-
2
*
n
)
-
1
);
fac
>
(
2
*
(
p
+
pp
-
2
*
n
)
-
2
*
(
m1
+
m2
+
m3
)
-
1
);
--
fac
)
one_over_double_fac
*=
fac
;
prefactor
/=
one_over_double_fac
;
}
else
{
mpf_class
numerator
=
1.0
;
for
(
size_t
fac
=
1
;
fac
<=
(
2
*
(
p
+
pp
-
2
*
n
)
-
2
*
(
m1
+
m2
+
m3
)
-
1
);
++
fac
)
numerator
*=
fac
;
mpf_class
denominator
=
1.0
;
for
(
size_t
fac
=
1
;
fac
<=
(
2
*
(
p
+
pp
-
2
*
n
)
-
1
);
++
fac
)
denominator
*=
fac
;
prefactor
*=
numerator
/
denominator
;
}
// Sum with h1+h2+h3=m
size_t
h1
=
m1
+
m2
+
m3
;
size_t
h2
=
0
;
size_t
h3
=
0
;
do
{
mpf_class
multinomial
=
trinomial
(
h1
,
h2
,
h3
);
result
+=
prefactor
*
multinomial
*
compute_EP0S
(
p
,
n
,
pp
,
p1
-
2
*
(
m1
-
h1
),
p2
-
2
*
(
m2
-
h2
),
p3
-
2
*
(
m3
-
h3
)
);
h2
=
(
++
h2
)
%
(
m1
+
m2
+
m3
+
1
-
h3
);
if
(
h2
==
0
)
++
h3
;
h1
=
m1
+
m2
+
m3
-
h2
-
h3
;
}
while
(
h3
<=
m1
+
m2
+
m3
);
// End sum with h1+h2+h3=m
}
return
result
;
}
mpf_class
compute_full_tensor_contraction
(
size_t
p
,
size_t
n
,
size_t
pp
)
{
size_t
P
=
p
+
pp
-
2
*
n
;
mpf_class
result
=
0.0
;
// Sum with p1+p2+p3=p+pp-2n
size_t
p1
=
P
;
size_t
p2
=
0
;
size_t
p3
=
0
;
do
{
mpf_class
multinomial
=
trinomial
(
p1
,
p2
,
p3
);
size_t
idx1
=
get_ep0_offset_from_p
(
P
)
+
get_ep0_idx
(
p1
,
p2
,
p3
);
result
+=
multinomial
*
all_ep0_p
[
idx1
]
*
compute_EP0
(
p
,
n
,
pp
,
p1
,
p2
,
p3
);
p2
=
(
++
p2
)
%
(
P
+
1
-
p3
);
if
(
p2
==
0
)
++
p3
;
p1
=
P
-
p2
-
p3
;
}
while
(
p3
<=
P
);
// End sum with p1+p2+p3=p+pp-2n
return
result
;
}
int
main
()
{
mpf_set_default_prec
(
default_prec
);
std
::
cout
<<
"Default precision: "
<<
mpf_get_default_prec
()
<<
"
\n
"
;
size_t
p_max
=
10
;
size_t
all_ep0_size
=
get_ep0_offset_from_p
(
p_max
+
1
);
all_ep0_p
=
new
mpf_class
[
all_ep0_size
];
std
::
printf
(
"For all ep0 up to p=%zu we need %zu independent variables.
\n
"
,
p_max
,
all_ep0_size
);
DEBUG_clear_all_ep0
(
p_max
,
all_ep0_p
);
fill_all_ep0
(
p_max
,
all_ep0_p
);
#ifdef DEBUG_TRACE
check_trace_of_all_ep0
(
p_max
,
all_ep0_p
);
#endif
//*
for
(
size_t
i
=
0
;
i
<
all_ep0_size
;
++
i
)
if
(
all_ep0_p
[
i
]
==
-
100.0
)
std
::
cout
<<
"Found "
<<
all_ep0_p
[
i
]
<<
"
\n
"
;
//*/
// NOTE(Sam): This will be indispensable for MPI communication
/*
mpf_content_buffer buffer;
init_mpf_content_buffer(&buffer, all_ep0_size);
dump_array_of_mpf_to_buffer(all_ep0_p, &buffer, all_ep0_size);
DEBUG_dump_all_ep0(p_max, all_ep0_p);
DEBUG_clear_all_ep0(p_max, all_ep0_p);
DEBUG_dump_all_ep0(p_max, all_ep0_p);
undump_array_of_mpf_from_buffer(all_ep0_p, &buffer, all_ep0_size);
DEBUG_dump_all_ep0(p_max, all_ep0_p);
clear_mpf_content_buffer(&buffer);
//*/
//multiset_buffer = new unsigned int[2*p_max];
//combination_buffer = new unsigned int[2*p_max];
//*
mpf_content_buffer
buffer
;
init_mpf_content_buffer
(
&
buffer
,
all_ep0_size
);
mp_exp_t
exp
;
std
::
cout
<<
"0."
<<
compute_full_tensor_contraction
(
1
,
1
,
1
).
get_str
(
exp
)
<<
" x10^("
<<
exp
<<
")
\n
"
;
std
::
cout
<<
"0."
<<
compute_full_tensor_contraction
(
1
,
1
,
1
).
get_str
(
exp
)
<<
" x10^("
<<
exp
<<
")
\n
"
;
std
::
cout
<<
"0."
<<
compute_full_tensor_contraction
(
1
,
1
,
1
).
get_str
(
exp
)
<<
" x10^("
<<
exp
<<
")
\n
"
;
//dump_array_of_mpf_to_buffer(all_ep0_p, &buffer, all_ep0_size);
//DEBUG_dump_all_ep0(p_max, all_ep0_p);
std
::
ifstream
mathematica_csv
(
"tests/contracted_tensors_mathematica.csv"
);
std
::
string
line
;
while
(
std
::
getline
(
mathematica_csv
,
line
))
{
std
::
istringstream
iss
(
line
);
size_t
p
,
n
,
pp
;
char
ignore
;
double
value
,
timing
;
if
(
!
(
iss
>>
p
>>
ignore
>>
n
>>
ignore
>>
pp
>>
ignore
>>
value
>>
ignore
>>
timing
)
)
{
break
;
}
//std::cout << "0." << compute_full_tensor_contraction(1,1,1).get_str(exp) << " x10^("<<exp << ")\n";
auto
t1
=
std
::
chrono
::
high_resolution_clock
::
now
();
mpf_class
computed
=
compute_full_tensor_contraction
(
p
,
n
,
pp
);
//dump_array_of_mpf_to_buffer(all_ep0_p, &buffer, all_ep0_size);
auto
t2
=
std
::
chrono
::
high_resolution_clock
::
now
();
std
::
chrono
::
duration
<
double
,
std
::
milli
>
ms_timing
=
t2
-
t1
;
//std::cout << "0." << compute_full_tensor_contraction(1,1,1).get_str(exp) << " x10^("<<exp << ")\n";
std
::
cout
<<
"("
<<
p
<<
", "
<<
n
<<
", "
<<
pp
<<
") - Error: "
<<
abs
(
computed
-
value
)
// << " --- abs(" << computed << " - " << value << ") ---"
<<
"
\t\t\t
Time: "
<<
ms_timing
.
count
()
<<
"ms (old: "
<<
timing
*
1000
<<
"ms) x"
<<
timing
*
1000
/
ms_timing
.
count
()
<<
"
\n
"
;
if
(
p
>
4
)
break
;
}
mathematica_csv
.
close
();
//DEBUG_dump_all_ep0(p_max, all_ep0_p);
std
::
cout
<<
"0."
<<
compute_full_tensor_contraction
(
1
,
1
,
1
).
get_str
(
exp
)
<<
" x10^("
<<
exp
<<
")
\n
"
;
std
::
cout
<<
"0."
<<
compute_full_tensor_contraction
(
1
,
1
,
1
).
get_str
(
exp
)
<<
" x10^("
<<
exp
<<
")
\n
"
;
std
::
cout
<<
"0."
<<
compute_full_tensor_contraction
(
1
,
1
,
1
).
get_str
(
exp
)
<<
" x10^("
<<
exp
<<
")
\n
"
;
clear_mpf_content_buffer
(
&
buffer
);
//*/
// TODO(Sam): This could go into tests
/*
size_t current_p = 0;
for(size_t offset = 0; current_p <= 1000000; offset += current_p*current_p*current_p/1000+1)
{
current_p = get_ep0_p_from_offseted_idx(offset);
size_t offmin = get_ep0_offset_from_p(current_p);
size_t offmax = get_ep0_offset_from_p(current_p+1);
std::cout << "Verifying " << offmin << " <= " << offset << " < " << offmax << " (p="<<current_p<<")\n";
assert(offmin <= offset && offset < offmax);
}
//*/
// TODO(Sam): This could go into tests
/*
size_t p1 = p;
size_t p2 = 0;
size_t p3 = 0;
do {
std::printf("- (%zu, %zu, %zu) is located at pos %zu\n", p1, p2, p3, get_ep0_idx(p1,p2,p3));
p2 = (++p2)%(p+1-p3);
if(p2 == 0)
++p3;
p1 = p - p2 -p3;
} while(p3 <= p);
std::cout << "---------\n";
for(size_t idx = 0; idx < get_ep0_size(p); ++idx)
{
get_ep0_coords(p, idx, &p1, &p2, &p3);
std::printf("- idx:%zu corresponds to (%zu, %zu, %zu)\n", idx, p1, p2, p3);
}
// End of tests
//*/
/*
for(size_t i = 0; i < 20; ++i)
std::cout << "Coeff for p=" << i << " is " << coeff_Np0(i) << "\n";
//*/
/*
for(size_t n = 0; n < 150; n += 10)
for(size_t m = 0; m <= n/2; m += 5)
std::cout << "[" << n << ", " << m << "] = " << arrangement_factor(n, m) << "\n";
//*/
//delete[] combination_buffer;
//delete[] multiset_buffer;
//delete[] ep0_p;
delete
[]
all_ep0_p
;
return
0
;
}
Event Timeline
Log In to Comment