diff --git a/src/mpi/mpi_interface.hh b/src/mpi/mpi_interface.hh
index 93b99ab..74f3a80 100644
--- a/src/mpi/mpi_interface.hh
+++ b/src/mpi/mpi_interface.hh
@@ -1,226 +1,236 @@
/**
* @file
* @section LICENSE
*
* Copyright (©) 2016-2020 EPFL (École Polytechnique Fédérale de Lausanne),
* Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
*/
/* -------------------------------------------------------------------------- */
#ifndef MPI_INTERFACE_HH
#define MPI_INTERFACE_HH
/* -------------------------------------------------------------------------- */
#include "static_types.hh"
#include "tamaas.hh"
#include
#ifdef USE_MPI
#include
#endif
/* -------------------------------------------------------------------------- */
namespace tamaas {
/* -------------------------------------------------------------------------- */
/// Contains mock mpi functions
namespace mpi_dummy {
struct comm {
static comm world;
};
struct sequential {
- void enter() const {};
- void exit() const {};
+ static void enter(){};
+ static void exit(){};
+};
+
+struct sequential_guard {
+ sequential_guard() { sequential::enter(); }
+ ~sequential_guard() { sequential::exit(); }
};
enum class thread : int { single, funneled, serialized, multiple };
inline bool initialized() { return false; }
inline bool finalized() { return false; }
inline int init(int*, char***) { return 0; }
inline int init_thread(int*, char***, thread, thread* provided) {
*provided = thread::funneled;
return 0;
}
inline int finalize() { return 0; }
inline int rank(comm = comm::world) { return 0; }
inline int size(comm = comm::world) { return 1; }
template
inline decltype(auto) reduce(T&& val, comm = comm::world) {
return std::forward(val);
}
template
inline decltype(auto) allreduce(T&& val, comm = comm::world) {
return std::forward(val);
}
template
inline decltype(auto) gather(const T* send, T* recv, int count,
comm = comm::world) {
if (send == recv)
return;
thrust::copy_n(send, count, recv);
}
} // namespace mpi_dummy
/* -------------------------------------------------------------------------- */
#ifdef USE_MPI
/// Contains real mpi functions
namespace mpi_impl {
/// MPI_Comm wrapper
struct comm {
MPI_Comm _comm;
operator MPI_Comm() { return _comm; }
static comm world;
};
struct sequential {
- void enter() const { comm::world._comm = MPI_COMM_SELF; }
- void exit() const { comm::world._comm = MPI_COMM_WORLD; }
+ static void enter() { comm::world._comm = MPI_COMM_SELF; }
+ static void exit() { comm::world._comm = MPI_COMM_WORLD; }
+};
+
+struct sequential_guard {
+ sequential_guard() { sequential::enter(); }
+ ~sequential_guard() { sequential::exit(); }
};
/// MPI Thread level
enum class thread : int {
single = MPI_THREAD_SINGLE,
funneled = MPI_THREAD_FUNNELED,
serialized = MPI_THREAD_SERIALIZED,
multiple = MPI_THREAD_MULTIPLE
};
template
struct type_trait;
#define TYPE(t, mpi_t) \
template <> \
struct type_trait { \
static constexpr MPI_Datatype value = mpi_t; \
}
TYPE(double, MPI_DOUBLE);
TYPE(int, MPI_INT);
TYPE(unsigned int, MPI_UNSIGNED);
TYPE(long double, MPI_LONG_DOUBLE);
TYPE(long, MPI_LONG);
TYPE(unsigned long, MPI_UNSIGNED_LONG);
TYPE(::thrust::complex, MPI_CXX_DOUBLE_COMPLEX);
TYPE(::thrust::complex, MPI_CXX_LONG_DOUBLE_COMPLEX);
TYPE(bool, MPI_CXX_BOOL);
#undef TYPE
template
struct operation_trait;
#define OPERATION(op, mpi_op) \
template <> \
struct operation_trait { \
static constexpr MPI_Op value = mpi_op; \
}
OPERATION(plus, MPI_SUM);
OPERATION(min, MPI_MIN);
OPERATION(max, MPI_MAX);
OPERATION(times, MPI_PROD);
#undef OPERATION
inline bool initialized() {
int has_init;
MPI_Initialized(&has_init);
return has_init != 0;
}
inline bool finalized() {
int has_final;
MPI_Finalized(&has_final);
return has_final != 0;
}
inline int init(int* argc, char*** argv) { return MPI_Init(argc, argv); }
inline int init_thread(int* argc, char*** argv, thread required,
thread* provided) {
return MPI_Init_thread(argc, argv, static_cast(required),
reinterpret_cast(provided));
}
inline int finalize() { return MPI_Finalize(); }
inline int rank(comm communicator = comm::world) {
int rank;
MPI_Comm_rank(communicator, &rank);
return rank;
}
inline int size(comm communicator = comm::world) {
int size;
MPI_Comm_size(communicator, &size);
return size;
}
template
inline decltype(auto) reduce(T val, comm communicator = comm::world) {
MPI_Reduce(&val, &val, 1, type_trait::value, operation_trait::value, 0,
communicator);
return val;
}
template ::value or
std::is_same::value>>
inline decltype(auto) allreduce(T val, comm communicator = comm::world) {
MPI_Allreduce(&val, &val, 1, type_trait::value, operation_trait::value,
communicator);
return val;
}
template
inline decltype(auto) allreduce(const StaticVector& v,
comm communicator = comm::world) {
Vector res;
MPI_Allreduce(v.begin(), res.begin(), n, type_trait::value,
operation_trait::value, communicator);
return res;
}
template
inline decltype(auto) allreduce(const StaticMatrix& v,
comm communicator = comm::world) {
Matrix res;
MPI_Allreduce(v.begin(), res.begin(), n * m, type_trait::value,
operation_trait::value, communicator);
return res;
}
template
inline decltype(auto) gather(const T* send, T* recv, int count,
comm communicator = comm::world) {
MPI_Gather(send, count, type_trait::value, recv, count,
type_trait::value, 0, communicator);
}
} // namespace mpi_impl
namespace mpi = mpi_impl;
#else
namespace mpi = mpi_dummy;
#endif // USE_MPI
} // namespace tamaas
/* -------------------------------------------------------------------------- */
#endif // MPI_INTERFACE_HH
diff --git a/tests/test_mpi.cpp b/tests/test_mpi.cpp
index d9b44c6..ba7e2d0 100644
--- a/tests/test_mpi.cpp
+++ b/tests/test_mpi.cpp
@@ -1,143 +1,149 @@
/**
* @file
* @section LICENSE
*
* Copyright (©) 2016-2020 EPFL (École Polytechnique Fédérale de Lausanne),
* Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
*/
/* -------------------------------------------------------------------------- */
#include "fftw_mpi_engine.hh"
#include "grid.hh"
#include "grid_hermitian.hh"
#include "grid_view.hh"
#include "partitioner.hh"
#include "test.hh"
#include
using namespace tamaas;
using fft = fftw::helper;
/* -------------------------------------------------------------------------- */
template
struct span {
T* ptr;
std::size_t size;
~span() { fftw::free(ptr); }
const T* begin() const { return ptr; }
const T* end() const { return ptr + size; }
T* begin() { return ptr; }
T* end() { return ptr + size; }
operator T*() { return ptr; }
};
+/* -------------------------------------------------------------------------- */
+TEST(TestMPIInterface, SequentialGuard) {
+ mpi::sequential_guard guard;
+ ASSERT_EQ(mpi::size(), 1);
+}
+
/* -------------------------------------------------------------------------- */
TEST(TestFFTWInterface, MPISizes) {
const std::ptrdiff_t N0 = 100, N1 = 100;
std::ptrdiff_t local_n0, local_n0_start;
auto alloc_local = fftw_mpi_local_size_2d(N0, N1 / 2 + 1, mpi::comm::world,
&local_n0, &local_n0_start);
const std::ptrdiff_t N[] = {N0, N1 / 2 + 1};
auto sizes = fftw::mpi::local_size_many(2, N, 1);
ASSERT_EQ(std::get<0>(sizes), alloc_local);
ASSERT_EQ(std::get<1>(sizes), local_n0);
ASSERT_EQ(std::get<2>(sizes), local_n0_start);
}
/* -------------------------------------------------------------------------- */
TEST(TestPartitioner, LocalSizes) {
const std::ptrdiff_t N0 = 100, N1 = 100;
std::ptrdiff_t local_n0, local_n0_start;
fftw_mpi_local_size_2d(N0, N1 / 2 + 1, mpi::comm::world, &local_n0,
&local_n0_start);
auto local_size = Partitioner<2>::local_size({N0, N1 / 2 + 1});
ASSERT_EQ(local_size[0], local_n0);
ASSERT_EQ(local_size[1], N1 / 2 + 1);
ASSERT_EQ(Partitioner<2>::local_offset({N0, N1 / 2 + 1}), local_n0_start);
}
/* -------------------------------------------------------------------------- */
TEST(TestPartitioner, LocalOffsetInGrid) {
std::array N = {10, 10};
std::array Nglobal = {mpi::allreduce(N.front()), N.back()};
Grid local(N, 1), global(Nglobal, 1);
auto offset = Partitioner<2>::local_offset(local);
auto local_n0_start = Partitioner<2>::local_offset(Nglobal);
ASSERT_EQ(offset, &global(local_n0_start, 0) - &global(0, 0));
}
/* -------------------------------------------------------------------------- */
TEST(TestFFTWMPIEngine, BasicTransform) {
const std::ptrdiff_t N0 = 100, N1 = 100;
auto local_size = Partitioner<2>::local_size({N0, N1 / 2 + 1});
Grid real({local_size[0], N1}, 1);
GridHermitian spectral(local_size, 1);
auto offset = Partitioner<2>::local_offset(real);
std::iota(real.begin(), real.end(), offset);
FFTWMPIEngine().forward(real, spectral);
auto gathered = Partitioner<2>::gather(spectral);
if (mpi::rank() != 0)
return;
Grid real_global({N0, N1}, 1);
GridHermitian solution({N0, N1 / 2 + 1}, 1);
std::iota(real_global.begin(), real_global.end(), 0);
FFTWEngine().forward(real_global, solution);
ASSERT_TRUE(compare(gathered, solution, AreComplexEqual()));
}
TEST(TestFFTWMPIEngine, ComponentsTransform) {
const std::ptrdiff_t N0 = 100, N1 = 100;
auto local_size = Partitioner<2>::local_size({N0, N1 / 2 + 1});
Grid real({local_size[0], N1}, 2);
GridHermitian spectral(local_size, 2);
auto offset = Partitioner<2>::local_offset(real);
std::iota(real.begin(), real.end(), offset);
FFTWMPIEngine().forward(real, spectral);
auto gathered = Partitioner<2>::gather(spectral);
if (mpi::rank() != 0)
return;
Grid real_global({N0, N1}, 2);
GridHermitian solution({N0, N1 / 2 + 1}, 2);
std::iota(real_global.begin(), real_global.end(), 0);
FFTWEngine().forward(real_global, solution);
ASSERT_TRUE(compare(gathered, solution, AreComplexEqual()));
}