diff --git a/main.cpp b/main.cpp
index a5505a6..bc6715e 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,802 +1,802 @@
 #include <cassert>
 #include <cstdio>
 #include <cmath>
 #include <iostream>
 #include <chrono>
 
 #include <fstream>
 #include <sstream>
 #include <string>
 
 #include <mpirxx.h>
 //#include <gmp-impl.h>
 
 //#define DEBUG_TRACE
 #ifndef DEBUG_TRACE
 // This optimization allows to compute only the
 // 2n+1 independent components but it biases the
 // trace check since trace will trivially be forced
 // to 0.
 #define EP0_COMPUTE_OPTIM
 #endif
 
 constexpr mp_bitcnt_t default_prec = 512;
 
 mpf_class* all_ep0_p;
 //unsigned int* multiset_buffer;
 //unsigned int* combination_buffer;
 
 inline size_t get_ep0_size(size_t p)
 {
 	size_t size = (p+1)*(p+2)/2;
 	return size;
 }
 
 inline size_t get_ep0_idx(size_t p1, size_t p2, size_t p3)
 {
 	size_t p = p1+p2+p3;
 	size_t idx = (p3*(2*p+3-p3))/2+p2;
 	return idx;
 }
 
 inline void get_ep0_coords(size_t p, size_t idx, size_t* p1, size_t* p2, size_t* p3)
 {
 	size_t ep0_size = get_ep0_size(p);
 	size_t reverse_idx = ep0_size - idx - 1;
 	size_t A = (size_t)std::ceil( (-1.0 + std::sqrt(1 + 8*(reverse_idx+1)))/2.0 );
 	*p3 = p + 1 - A;
 	*p2 = idx - (ep0_size - A*(A+1)/2 );
 	*p1 = p - *p3 - *p2;
 }
 
 inline mpf_class coeff_Np0(size_t p)
 {
 	// (2p)!/(p!)
 	mpf_class p2fac_over_pfac = 1.0;
 	for(size_t fac = 2*p; fac > p; --fac)
 		p2fac_over_pfac *= fac;
 
 	// p!
 	mpf_class pfac = 1.0;
 	for(size_t fac = 2; fac <= p; ++fac)
 		pfac *= fac;
 
 	// 1/2^p
 	mpf_class one_over_2p = std::pow(2.0, -(int)p);
 
 	// sqrt( (2p)! / 2^p / (p!)^2 )
 	mpf_class result = sqrt(one_over_2p * p2fac_over_pfac / pfac);
 		
 	return result;
 }
 
 inline mpf_class arrangement_factor(size_t n, size_t m)
 {
 	assert(m <= n/2);
 	
 	size_t m2 = n - 2*m;
 
 	// Note: m <= n/2, thus m <= m2
 	mpf_class nfac_over_m2fac = 1.0;
 	for(size_t fac = n; fac > m2; --fac)
 		nfac_over_m2fac *= fac;
 
 	mpf_class mfac = 1.0;
 	for(size_t fac = 2; fac <= m; ++fac)
 		mfac *= fac;
 
 	mpf_class one_over_2m = std::pow(2.0, -(int)m);
 
 	mpf_class result = one_over_2m * nfac_over_m2fac / mfac;
 
 	return result;
 }
 
 mpf_class compute_ep0(size_t p1, size_t p2, size_t p3)
 {
 	// Trivial case
 	if((p1%2) +  (p2%2) > 0)
 		return 0.0;
 
 	size_t p = p1+p2+p3;
 	size_t k1 = p1/2;
 	size_t k2 = p2/2;
 	size_t k3 = p3/2;
 	mpf_class result = 0.0;
 
 	mpf_class sign = (k1+k2) % 2 == 0 ? -1.0 : 1.0;
 	for(size_t m3 = 0; m3 <= k3; ++m3)
 	{
 		sign *= -1.0;
 		
 		mpf_class p1fac_over_k1fac = 1.0;
 		for(size_t fac = p1; fac > k1; --fac)
 			p1fac_over_k1fac *= fac;
 
 		mpf_class p2fac_over_k2fac = 1.0;
 		for(size_t fac = p2; fac > k2; --fac)
 			p2fac_over_k2fac *= fac;
 
 		assert( (p + p3 -2*m3 -1) % 2 == (2*p-1) % 2 );
 		// NOTE: Here the denominator is greater than numerator
 		mpf_class one_over_double_fac_ratio = 1.0;
 		for(size_t fac = 2*p-1; fac > p + p3 -2*m3 -1; fac -= 2 )
 			one_over_double_fac_ratio *= fac;
 
 		mpf_class one_over_2k1k2 = std::pow(2.0, -(int)(k1+k2));
 
 		result += sign * one_over_2k1k2 / one_over_double_fac_ratio
 			* p1fac_over_k1fac * p2fac_over_k2fac * arrangement_factor(p3, m3);
 	}
 
 	result *= coeff_Np0(p);
 	
 	return result;
 }
 
 #define max(a,b) (((a)<(b))?(b):(a))
 
 void check_trace_of_ep0(size_t p, mpf_class* ep0_p)
 {
 	std::cout << "=== Begin Check Trace (p: " << p << ") ===\n";
 
 	if(p < 2)
 	{
 		std::cout << " - Trace is only valide for tansor of rank p >= 2.\n";
 		return;
 	}
 
 	mpf_class max_error = 0.0;
 	
 	size_t n = p-2;
 	
 	size_t n1 = n;
 	size_t n2 = 0;
 	size_t n3 = 0;
 	do {
 		size_t idx1 = get_ep0_idx(n1+2,n2,n3);
 		size_t idx2 = get_ep0_idx(n1,n2+2,n3);
 		size_t idx3 = get_ep0_idx(n1,n2,n3+2);
 
 		mpf_class trace = ep0_p[idx1]+ep0_p[idx2]+ep0_p[idx3];
 		//std::printf("Trace: %g (%g, %g, %g)\n", trace, ep0_p[idx1],ep0_p[idx2],ep0_p[idx3]);
 
 		max_error = max(abs(trace), max_error);
-		
-		n2 = (++n2)%(n+1-n3);
+
+		n2 = (n2+1)%(n+1-n3);
 		if(n2 == 0)
 			++n3;
 		n1 = n - n2 - n3;
 	} while(n3 <= n);
 
 	std::cout << " - Max Error: " << max_error << "\n";
 }
 
 inline size_t get_ep0_offset_from_p(size_t p)
 {
 	size_t offset = p*(p+1)*(p+2)/6;
 	return offset;
 }
 
 inline size_t get_ep0_p_from_offseted_idx(size_t idx)
 {
 	// NOTE: Exact solution is not precise enough due to
 	// fp errors. This is why I choosed to go with Newton's root finding
 	
 	mpf_class x = 0.0;
 	mpf_class offset_f = -1.0;
 	while(abs(x-offset_f)>1e-4) // magic number, have been tested to be enough for p -> 1'000'000
 	{
 		offset_f = x;
 		x = (2*x*x*x + 3*x*x + 6*idx) / (3*x*x + 6*x + 2);
 		//std::cout << "x = " << x << "\n";
 	}
 
 	size_t offset = (size_t)(offset_f.get_d());
 	return offset;
 }
 
 inline size_t get_ep0_offset_from_offseted_idx(size_t idx)
 {
 	return get_ep0_offset_from_p(get_ep0_p_from_offseted_idx(idx));
 }
 
 void fill_ep0(size_t p, mpf_class* ep0_p)
 {
 	size_t p1 = p;
 	size_t p2 = 0;
 	size_t p3 = 0;
 	do {
 		size_t idx = get_ep0_idx(p1,p2,p3);
 
 #ifdef EP0_COMPUTE_OPTIM
 		if(idx <= 2*p) // <=> idx+1 < 2p+1
 #endif
 		{
 			ep0_p[idx] = compute_ep0(p1, p2, p3);
 		}
 #ifdef EP0_COMPUTE_OPTIM
 		else
 #else
 		if(false)
 #endif
 		{
 			assert(p3 >= 2);
 			size_t idx1 = get_ep0_idx(p1+2,p2,p3-2);
 			size_t idx2 = get_ep0_idx(p1,p2+2,p3-2);
 			ep0_p[idx] = - ep0_p[idx1] - ep0_p[idx2];
 		}
 
 		//if(ep0_p[idx] != 0.0)
 		{
 			//std::printf("- (%zu, %zu, %zu) is located at pos %zu", p1, p2, p3, idx);
 			//std::cout << " -> value is " << ep0_p[idx] << "\n";
 		}
 		
-		p2 = (++p2)%(p+1-p3);
+		p2 = (p2+1)%(p+1-p3);
 		if(p2 == 0)
 			++p3;
 		p1 = p - p2 - p3;
 	} while(p3 <= p);
 
 }
 
 void DEBUG_clear_all_ep0(size_t p_max, mpf_class* all_ep0_p)
 {
 	size_t upper_bound = get_ep0_offset_from_p(p_max+1);
 	for(size_t i = 0; i < upper_bound; ++i)
 		all_ep0_p[i] = -100.0;
 }
 
 void DEBUG_dump_all_ep0(size_t p_max, mpf_class* all_ep0_p)
 {
 	std::cout << " === DUMP ALL ep0 === \n";
 	for(size_t p = 0; p <= p_max; ++p)
 	{
 		size_t line_counter = 0;
 		for(size_t i = 0; i < get_ep0_size(p); ++i)
 		{
 			std::cout << all_ep0_p[get_ep0_offset_from_p(p)+i] << " ";
 			++line_counter;
 			if(line_counter % 12 == 0)
 			{
 				std::cout << "\n";
 			}
 		}
 		if(p < p_max)
 			std::cout << "\n -------- ";
 		std::cout << "\n";
 	}
 }
 
 void fill_all_ep0(size_t p_max, mpf_class* all_ep0_p)
 {
 	for(size_t p = 0; p <= p_max; ++p)
 	{
 		mpf_class* ep0_p = all_ep0_p + get_ep0_offset_from_p(p);
 		fill_ep0(p, ep0_p);
 	}
 }
 
 void check_trace_of_all_ep0(size_t p_max, mpf_class* all_ep0_p)
 {
 	for(size_t p = 0; p <= p_max; ++p)
 		check_trace_of_ep0(p, all_ep0_p+get_ep0_offset_from_p(p));
 }
 
 void dump_mpf_to_buffer(mpf_class* value_p, int* prec_p, int* size_p, mp_exp_t* exp_p, mp_limb_t* limbs_p)
 {
 	// NOTE: This procedure is only valid beacause mpf is guarantied to be of fixed precision
 	*prec_p = value_p->get_mpf_t()->_mp_prec;
 	assert(value_p->get_prec() == default_prec);
 	*size_p = value_p->get_mpf_t()->_mp_size;
 	*exp_p = value_p->get_mpf_t()->_mp_exp;
-	for(size_t i = 0; i < abs(*size_p); ++i)
+	for(size_t i = 0; i < (size_t)abs(*size_p); ++i)
 		limbs_p[i] = *(value_p->get_mpf_t()->_mp_d+i);
 }
 
 void undump_mpf_from_buffer(mpf_class* value_p, int* prec_p, int* size_p, mp_exp_t* exp_p, mp_limb_t* limbs_p)
 {
 	// NOTE: This procedure is only valid beacause mpf is guarantied to be of fixed precision	
 	value_p->get_mpf_t()->_mp_prec = *prec_p;
 	value_p->get_mpf_t()->_mp_size = *size_p;
 	value_p->get_mpf_t()->_mp_exp = *exp_p;
-	for(size_t i = 0; i < abs(*size_p); ++i)
+	for(size_t i = 0; i < (size_t)abs(*size_p); ++i)
 		*(value_p->get_mpf_t()->_mp_d+i) = limbs_p[i];
 }
 
 struct mpf_content_buffer {
 	size_t size;
 	// ---
 	int* size_p;
 	int* prec_p;
 	mp_exp_t* exp_p;
 	mp_limb_t* limbs_p;
 };
 
 void init_mpf_content_buffer(mpf_content_buffer* buffer, size_t size)
 {
 	buffer->size = size;
 	buffer->size_p = new int[size];
 	buffer->prec_p = new int[size];
 	buffer->exp_p = new mp_exp_t[size];
 	buffer->limbs_p = new mp_limb_t[size*(default_prec + 1)];
 }
 
 void clear_mpf_content_buffer(mpf_content_buffer* buffer)
 {
 	delete[] buffer->limbs_p;
 	delete[] buffer->exp_p;
 	delete[] buffer->prec_p;
 	delete[] buffer->size_p;
 	buffer->size = 0;
 }
 
 void dump_array_of_mpf_to_buffer(mpf_class* value_p, mpf_content_buffer* buffer, size_t size)
 {
 	for(size_t i = 0; i < size; ++i)
 	{
 		dump_mpf_to_buffer(value_p+i, buffer->prec_p+i,
 						   buffer->size_p+i, buffer->exp_p+i,
 						   buffer->limbs_p + i*(default_prec+1));
 	}
 }
 
 void undump_array_of_mpf_from_buffer(mpf_class* value_p, mpf_content_buffer* buffer, size_t size)
 {
 	for(size_t i = 0; i < size; ++i)
 	{
 		undump_mpf_from_buffer(value_p+i, buffer->prec_p+i,
 							   buffer->size_p+i, buffer->exp_p+i,
 							   buffer->limbs_p + i*(default_prec+1));
 	}
 }
 // Thanks to Martin Broadhurst (http://www.martinbroadhurst.com/combinations-of-a-multiset.html)
 unsigned int next_multiset_combination(const unsigned int *multiset, unsigned int *ar,
 									   size_t n, size_t k)
 {
     bool finished = false;
     bool changed = false;
     size_t i;
  
     for (i = k - 1; !finished && !changed; i--) {
         if (ar[i] < multiset[i + (n - k)]) {
             /* Find the successor */
             size_t j;
             for (j = 0; multiset[j] <= ar[i]; j++);
             /* Replace this element with it */
             ar[i] = multiset[j];
             if (i < k - 1) {
                 /* Make the elements after it the same as this part of the multiset */
                 size_t l;
                 for (l = i + 1, j = j + 1; l < k; l++, j++) {
                     ar[l] = multiset[j];
                 }
             }
             changed = true;
         }
         finished = (i == 0);
     }
     if (!changed) {
         /* Reset to first combination */
         for (i = 0; i < k; i++) {
             ar[i] = multiset[i];
         }
     }
     return changed;
 }
 
 unsigned int* create_multiset(size_t n1, size_t n2, size_t n3, unsigned int* buffer)
 {
 	// Buffer is supposed to be big enough!
 	
 	unsigned int* multiset = buffer;
 
 	size_t i = 0;
 	for(;i < n1; ++i)
 		multiset[i] = 1;
 	for(;i < n1+n2; ++i)
 		multiset[i] = 2;
 	for(;i < n1+n2+n3; ++i)
 		multiset[i] = 3;
 	
 	return multiset;
 }
 
 unsigned int* create_first_combination_from_multiset(unsigned int* multiset, size_t k, unsigned int* buffer)
 {
 	unsigned int* combination = buffer;
 	for(size_t i = 0; i < k; ++i)
 		combination[i] = multiset[i];
 	return combination;
 }
 
 void read_combination_of_multiset(unsigned int* combination, size_t p1, size_t p2, size_t p3,
 								  size_t* L1, size_t* L2, size_t* L3,
 								  size_t* R1, size_t* R2, size_t* R3)
 {
 	size_t i = 0;
 	*L1 = *L2 = *L3 = 0;
-	for(;combination[i]==1;++i) ++(*L1); *R1 = p1 - *L1;
-	for(;combination[i]==2;++i) ++(*L2); *R2 = p2 - *L2;
-	for(;combination[i]==3;++i) ++(*L3); *R3 = p3 - *L3;	
+	for(;combination[i]==1;++i){ ++(*L1); } *R1 = p1 - *L1;
+	for(;combination[i]==2;++i){ ++(*L2); } *R2 = p2 - *L2;
+	for(;combination[i]==3;++i){ ++(*L3); } *R3 = p3 - *L3;	
 }
 
 inline mpf_class binomial(size_t n, size_t p)
 {
 	size_t p1 = (p > n-p) ? p : n-p;
 	size_t p2 = n-p1;
 
 	mpf_class p2fac = 1.0;
 	for(size_t fac = 1; fac <= p2; ++fac) p2fac *= fac;
 	
 	mpf_class result = 1.0;
 	for(size_t fac = n; fac > p1; --fac) result *= fac;
 
 	result /= p2fac;
 
 	return result;
 }
 
 inline mpf_class trinomial(size_t n1, size_t n2, size_t n3)
 {
 	// This ensure that n1_ is the greatest of the three
 	size_t n1_, n2_, n3_;
 	if(n1 >= n2 && n1 >= n3) { n1_ = n1; n2_ = n2; n3_ = n3; }
 	else if(n2 >= n1 && n2 >= n3) { n1_ = n2; n2_ = n1; n3_ = n3; }
 	else { n1_ = n3; n2_ = n1; n3_ = n2; }
 
 	mpf_class n2fac = 1.0;
 	mpf_class n3fac = 1.0;
 	for(size_t fac = 1; fac <= n2_; ++fac) n2fac *= fac;
 	for(size_t fac = 1; fac <= n3_; ++fac) n3fac *= fac;
 
 	mpf_class multinomial = 1.0;
 	for(size_t fac = n1+n2+n3; fac > n1_; --fac)
 		multinomial *= fac;
 	multinomial /= (n2fac*n3fac);
 
 	return multinomial;
 }
 
 mpf_class compute_EP0S(size_t p, size_t n, size_t pp,
 					  size_t p1, size_t p2, size_t p3)
 {
 	assert(p1+p2+p3 == p + pp - 2*n);
 	
 	// Compute the symmetric part of (ep0 .n ep'0)
 	mpf_class result = 0.0;
 
 	// Sum over k-combinations:
 	size_t k = p-n;
 
 	if(k == 0) return 1.0;
 
 	// NOTE(Sam): This allocation is suboptimal but I had issue with the crapy allocator
 	// I implemented eralier and it works like that. I'll try to optimize it if needed
 	// in the future.
 	unsigned int* multiset = new unsigned int[p1+p2+p3];
 	unsigned int* combination = new unsigned int[k];
 	multiset = create_multiset(p1,p2,p3, multiset);
     combination = create_first_combination_from_multiset(multiset, k, combination);
 
 	/*
 	std::cout << "p1+p2+p3="<<p1+p2+p3<<", k="<<k<<"\n";
 	for(size_t i = 0; i < p1+p2+p3; ++i)
 		std::cout << multiset[i] << " ";
 	std::cout << "\n";
 	for(size_t i = 0; i < k; ++i)
 		std::cout << combination[i] << " ";
 	std::cout << "\n";
 	std::cout << "---" "\n";
 	//*/
 
 	//std::cout << "---\n";
 	do {
 		/*
 		for(size_t i = 0; i < k; ++i)
 			std::cout << combination[i] << " ";
 		std::cout << "\n";
 		//*/
 		
 		size_t L1, L2, L3, R1, R2, R3;
 		read_combination_of_multiset(combination, p1, p2, p3, &L1, &L2, &L3, &R1, &R2, &R3);
 		//std::cout << "(" << L1 << ", " << L2 << ", " << L3 << ") - (" << R1 << ", " << R2 << ", " << R3 << ")\n";
 		
 		mpf_class binome_1 = binomial(p1, L1);
 		mpf_class binome_2 = binomial(p2, L2);
 		mpf_class binome_3 = binomial(p3, L3);
 
 		// Sum with n1+n2+n3=n
 		size_t n1 = n;
 		size_t n2 = 0;
 		size_t n3 = 0;
 		do {
 			mpf_class multinomial = trinomial(n1,n2,n3);
 			
 			size_t idx1 = get_ep0_offset_from_p(p) + get_ep0_idx(L1+n1, L2+n2, L3+n3);
 			size_t idx2 = get_ep0_offset_from_p(pp) + get_ep0_idx(R1+n1, R2+n2, R3+n3);
 
 			result += binome_1*binome_2*binome_3*multinomial*all_ep0_p[idx1]*all_ep0_p[idx2];
 		
-			n2 = (++n2)%(n+1-n3);
+			n2 = (n2+1)%(n+1-n3);
 			if(n2 == 0)
 				++n3;
 			n1 = n - n2 - n3;
 		} while(n3 <= n);
 		// End sum with n1+n2+n3=n
 		
     } while (next_multiset_combination(multiset, combination, p1+p2+p3, k));
 	// End sum over k-combinations
 
 	delete[] combination;
 	delete[] multiset;
 	
 	// Compute prefactor (p-n)!(pp-n)!/(p+p-2n)!
 	size_t upper_left, upper_right;
 	size_t lower = p+pp-2*n;
 	if(p-n > pp-n)
 	{
 		upper_left = p-n;
 		upper_right = pp-n;
 	}
 	else
 	{
 		upper_left = pp-n;
 		upper_right = p-n;
 	}
 
 	mpf_class lower_fac_over_upper_left_fac = 1.0;
 	for(size_t fac = lower; fac > upper_left; --fac)
 		lower_fac_over_upper_left_fac *= fac;
 
 	mpf_class upper_right_fac = 1.0;
 	for(size_t fac = 1; fac <= upper_right; ++fac)
 		upper_right_fac *= fac;
 
 	result *= upper_right_fac / lower_fac_over_upper_left_fac;
 	
 	return result;
 }
 
 mpf_class compute_EP0(size_t p, size_t n, size_t pp,
 					  size_t p1, size_t p2, size_t p3)
 {
 	// Compute the traceless and symmetric part of (ep0 .n ep'0)
 	mpf_class result = 0.0;
 
 	size_t k1 = p1/2;
 	size_t k2 = p2/2;
 	size_t k3 = p3/2;
 
 	for(size_t m1 = 0; m1 <= k1; ++m1)
 		for(size_t m2 = 0; m2 <= k2; ++m2)
 			for(size_t m3 = 0; m3 <= k3; ++m3)
 			{
 				mpf_class sign = ( (m1+m2+m3)%2 == 0 ? 1.0 : -1.0 );
 
 				mpf_class arr_factor_1 = arrangement_factor(p1, m1);
 				mpf_class arr_factor_2 = arrangement_factor(p2, m2);
 				mpf_class arr_factor_3 = arrangement_factor(p3, m3);
 
 				mpf_class prefactor = sign * arr_factor_1 * arr_factor_2 * arr_factor_3;
 
 				if( (2*(p+pp-2*n) - 2*(m1+m2+m3)-1) % 2 == (2*(p+pp-2*n)-1) % 2)
 				{
 					mpf_class one_over_double_fac = 1.0;
 					for(size_t fac = (2*(p+pp-2*n)-1); fac > (2*(p+pp-2*n)-2*(m1+m2+m3)-1); --fac)
 						one_over_double_fac *= fac;
 					prefactor /= one_over_double_fac;
 				}
 				else
 				{
 					mpf_class numerator = 1.0;
 					for(size_t fac = 1; fac <= (2*(p+pp-2*n)-2*(m1+m2+m3)-1); ++fac) numerator *= fac;
 					
 					mpf_class denominator = 1.0;
 					for(size_t fac = 1; fac <= (2*(p+pp-2*n)-1); ++fac) denominator *= fac;
 
 					prefactor *= numerator/denominator;
 				}
 				
 				// Sum with h1+h2+h3=m
 				size_t h1 = m1+m2+m3;
 				size_t h2 = 0;
 				size_t h3 = 0;
 				do {
 					
 					mpf_class multinomial = trinomial(h1,h2,h3);
 
 					result += prefactor * multinomial
 						* compute_EP0S(p,n,pp, p1 - 2*(m1-h1), p2 - 2*(m2-h2), p3 - 2*(m3-h3) );
 					
-					h2 = (++h2)%(m1+m2+m3+1-h3);
+					h2 = (h2+1)%(m1+m2+m3+1-h3);
 					if(h2 == 0)
 						++h3;
 					h1 = m1+m2+m3 - h2 - h3;
 				} while(h3 <= m1+m2+m3);
 				// End sum with h1+h2+h3=m
 				
 			}
 	
 	return result;
 }
 
 mpf_class compute_full_tensor_contraction(size_t p, size_t n, size_t pp)
 {
 	size_t P = p+pp-2*n;
 	mpf_class result = 0.0;
 
 	// Sum with p1+p2+p3=p+pp-2n
 	size_t p1 = P;
 	size_t p2 = 0;
 	size_t p3 = 0;
 	do {
 
 		mpf_class multinomial = trinomial(p1,p2,p3);
 
 		size_t idx1 = get_ep0_offset_from_p(P) + get_ep0_idx(p1,p2,p3);
 		
 		result += multinomial*all_ep0_p[idx1]*compute_EP0(p,n,pp, p1,p2,p3);
 		
-		p2 = (++p2)%(P+1-p3);
+		p2 = (p2+1)%(P+1-p3);
 		if(p2 == 0)
 			++p3;
 		p1 = P - p2 - p3;
 	} while(p3 <= P);
 	// End sum with p1+p2+p3=p+pp-2n
 
 	return result;
 }
 
 int main()
 {
 	mpf_set_default_prec(default_prec);
 	std::cout << "Default precision: " << mpf_get_default_prec() << "\n";
 
 	size_t p_max = 10;
 	size_t all_ep0_size = get_ep0_offset_from_p(p_max+1);
 	all_ep0_p = new mpf_class[all_ep0_size];
 	std::printf("For all ep0 up to p=%zu we need %zu independent variables.\n", p_max, all_ep0_size);
 
 	DEBUG_clear_all_ep0(p_max, all_ep0_p);
 
 	fill_all_ep0(p_max, all_ep0_p);
 
 #ifdef DEBUG_TRACE		
 	check_trace_of_all_ep0(p_max, all_ep0_p);
 #endif
 	
 	//*
 	for(size_t i = 0; i < all_ep0_size; ++i)
 		if(all_ep0_p[i] == -100.0)
 			std::cout << "Found " << all_ep0_p[i] << "\n";
 	//*/
 	
 	// NOTE(Sam): This will be indispensable for MPI communication
 	/*
 	mpf_content_buffer buffer;
 	init_mpf_content_buffer(&buffer, all_ep0_size);
 	
 	dump_array_of_mpf_to_buffer(all_ep0_p, &buffer, all_ep0_size);
 	DEBUG_dump_all_ep0(p_max, all_ep0_p);
 
 	DEBUG_clear_all_ep0(p_max, all_ep0_p);
 	DEBUG_dump_all_ep0(p_max, all_ep0_p);
 
 	undump_array_of_mpf_from_buffer(all_ep0_p, &buffer, all_ep0_size);
 	DEBUG_dump_all_ep0(p_max, all_ep0_p);
 	
 	clear_mpf_content_buffer(&buffer);
 	//*/
 
 	//multiset_buffer = new unsigned int[2*p_max];
 	//combination_buffer = new unsigned int[2*p_max];
 
 
 	//*
 	mpf_content_buffer buffer;
 	init_mpf_content_buffer(&buffer, all_ep0_size);
 
 	mp_exp_t exp;
 	std::cout << "0." << compute_full_tensor_contraction(1,1,1).get_str(exp) << " x10^("<<exp << ")\n";
 	std::cout << "0." << compute_full_tensor_contraction(1,1,1).get_str(exp) << " x10^("<<exp << ")\n";
 	std::cout << "0." << compute_full_tensor_contraction(1,1,1).get_str(exp) << " x10^("<<exp << ")\n";
 	//dump_array_of_mpf_to_buffer(all_ep0_p, &buffer, all_ep0_size);
 
 	//DEBUG_dump_all_ep0(p_max, all_ep0_p);
 	
 	std::ifstream mathematica_csv("tests/contracted_tensors_mathematica.csv");
 	std::string line;
 	while(std::getline(mathematica_csv, line))
 	{
 		std::istringstream iss(line);
 		size_t p, n, pp;
 		char ignore;
 		double value, timing;
 
 		if(! (iss >> p >> ignore >> n >> ignore >> pp >> ignore >> value >> ignore >> timing ) ) { break; }
 
 		//std::cout << "0." << compute_full_tensor_contraction(1,1,1).get_str(exp) << " x10^("<<exp << ")\n";
 		auto t1 = std::chrono::high_resolution_clock::now();
 		
 		mpf_class computed = compute_full_tensor_contraction(p,n,pp);
 		//dump_array_of_mpf_to_buffer(all_ep0_p, &buffer, all_ep0_size);
 
 		auto t2 = std::chrono::high_resolution_clock::now();
 
 		std::chrono::duration<double, std::milli> ms_timing = t2 - t1;
 		
 		//std::cout << "0." << compute_full_tensor_contraction(1,1,1).get_str(exp) << " x10^("<<exp << ")\n";
 		std::cout << "("<< p << ", " << n << ", "<< pp <<") - Error: " << abs(computed - value) // << " --- abs(" << computed << " - " << value << ") ---"
 				  << "\t\t\t Time: " << ms_timing.count() << "ms (old: " << timing*1000 << "ms) x"<< timing*1000/ms_timing.count() << "\n";
 
 		if(p > 4) break;
 	}
 	mathematica_csv.close();
 
 	//DEBUG_dump_all_ep0(p_max, all_ep0_p);
 
 	std::cout << "0." << compute_full_tensor_contraction(1,1,1).get_str(exp) << " x10^("<<exp << ")\n";
 	std::cout << "0." << compute_full_tensor_contraction(1,1,1).get_str(exp) << " x10^("<<exp << ")\n";
 	std::cout << "0." << compute_full_tensor_contraction(1,1,1).get_str(exp) << " x10^("<<exp << ")\n";
 	
 	clear_mpf_content_buffer(&buffer);
 	//*/
 	
 	// TODO(Sam): This could go into tests
 	/*
 	size_t current_p = 0;
 	for(size_t offset = 0; current_p <= 1000000; offset += current_p*current_p*current_p/1000+1)
 	{
 	    current_p = get_ep0_p_from_offseted_idx(offset);
 		size_t offmin = get_ep0_offset_from_p(current_p);
 		size_t offmax = get_ep0_offset_from_p(current_p+1);
 		std::cout << "Verifying " << offmin << " <= " << offset << " < " << offmax << " (p="<<current_p<<")\n";
 		assert(offmin <= offset && offset < offmax);
 	}
 	//*/
 
 	// TODO(Sam): This could go into tests
 	/*
 	size_t p1 = p;
 	size_t p2 = 0;
 	size_t p3 = 0;
 	do {
 		std::printf("- (%zu, %zu, %zu) is located at pos %zu\n", p1, p2, p3, get_ep0_idx(p1,p2,p3));
 		
 		p2 = (++p2)%(p+1-p3);
 		if(p2 == 0)
 			++p3;
 		p1 = p - p2 -p3;
 	} while(p3 <= p);
 
 	std::cout << "---------\n";
 
 	for(size_t idx = 0; idx < get_ep0_size(p); ++idx)
 	{
 		get_ep0_coords(p, idx, &p1, &p2, &p3);
 		std::printf("- idx:%zu corresponds to (%zu, %zu, %zu)\n", idx, p1, p2, p3);
 	}
 	// End of tests
 	//*/
 
 	/*
 	for(size_t i = 0; i < 20; ++i)
 			std::cout << "Coeff for p=" << i << " is " << coeff_Np0(i) << "\n";
 	//*/
 
 	/*
 	for(size_t n = 0; n < 150; n += 10)
 		for(size_t m = 0; m <= n/2; m += 5)
 			std::cout << "[" << n << ", " << m << "] = " << arrangement_factor(n, m) << "\n";
 	//*/
 	
 	//delete[] combination_buffer;
 	//delete[] multiset_buffer;
 
 	//delete[] ep0_p;
 	delete[] all_ep0_p;
 	
 	return 0;
 }