diff --git a/src/mpi/mpi_interface.hh b/src/mpi/mpi_interface.hh index 93b99ab..74f3a80 100644 --- a/src/mpi/mpi_interface.hh +++ b/src/mpi/mpi_interface.hh @@ -1,226 +1,236 @@ /** * @file * @section LICENSE * * Copyright (©) 2016-2020 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ #ifndef MPI_INTERFACE_HH #define MPI_INTERFACE_HH /* -------------------------------------------------------------------------- */ #include "static_types.hh" #include "tamaas.hh" #include #ifdef USE_MPI #include #endif /* -------------------------------------------------------------------------- */ namespace tamaas { /* -------------------------------------------------------------------------- */ /// Contains mock mpi functions namespace mpi_dummy { struct comm { static comm world; }; struct sequential { - void enter() const {}; - void exit() const {}; + static void enter(){}; + static void exit(){}; +}; + +struct sequential_guard { + sequential_guard() { sequential::enter(); } + ~sequential_guard() { sequential::exit(); } }; enum class thread : int { single, funneled, serialized, multiple }; inline bool initialized() { return false; } inline bool finalized() { return false; } inline int init(int*, char***) { return 0; } inline int init_thread(int*, char***, thread, thread* provided) { *provided = thread::funneled; return 0; } inline int finalize() { return 0; } inline int rank(comm = comm::world) { return 0; } inline int size(comm = comm::world) { return 1; } template inline decltype(auto) reduce(T&& val, comm = comm::world) { return std::forward(val); } template inline decltype(auto) allreduce(T&& val, comm = comm::world) { return std::forward(val); } template inline decltype(auto) gather(const T* send, T* recv, int count, comm = comm::world) { if (send == recv) return; thrust::copy_n(send, count, recv); } } // namespace mpi_dummy /* -------------------------------------------------------------------------- */ #ifdef USE_MPI /// Contains real mpi functions namespace mpi_impl { /// MPI_Comm wrapper struct comm { MPI_Comm _comm; operator MPI_Comm() { return _comm; } static comm world; }; struct sequential { - void enter() const { comm::world._comm = MPI_COMM_SELF; } - void exit() const { comm::world._comm = MPI_COMM_WORLD; } + static void enter() { comm::world._comm = MPI_COMM_SELF; } + static void exit() { comm::world._comm = MPI_COMM_WORLD; } +}; + +struct sequential_guard { + sequential_guard() { sequential::enter(); } + ~sequential_guard() { sequential::exit(); } }; /// MPI Thread level enum class thread : int { single = MPI_THREAD_SINGLE, funneled = MPI_THREAD_FUNNELED, serialized = MPI_THREAD_SERIALIZED, multiple = MPI_THREAD_MULTIPLE }; template struct type_trait; #define TYPE(t, mpi_t) \ template <> \ struct type_trait { \ static constexpr MPI_Datatype value = mpi_t; \ } TYPE(double, MPI_DOUBLE); TYPE(int, MPI_INT); TYPE(unsigned int, MPI_UNSIGNED); TYPE(long double, MPI_LONG_DOUBLE); TYPE(long, MPI_LONG); TYPE(unsigned long, MPI_UNSIGNED_LONG); TYPE(::thrust::complex, MPI_CXX_DOUBLE_COMPLEX); TYPE(::thrust::complex, MPI_CXX_LONG_DOUBLE_COMPLEX); TYPE(bool, MPI_CXX_BOOL); #undef TYPE template struct operation_trait; #define OPERATION(op, mpi_op) \ template <> \ struct operation_trait { \ static constexpr MPI_Op value = mpi_op; \ } OPERATION(plus, MPI_SUM); OPERATION(min, MPI_MIN); OPERATION(max, MPI_MAX); OPERATION(times, MPI_PROD); #undef OPERATION inline bool initialized() { int has_init; MPI_Initialized(&has_init); return has_init != 0; } inline bool finalized() { int has_final; MPI_Finalized(&has_final); return has_final != 0; } inline int init(int* argc, char*** argv) { return MPI_Init(argc, argv); } inline int init_thread(int* argc, char*** argv, thread required, thread* provided) { return MPI_Init_thread(argc, argv, static_cast(required), reinterpret_cast(provided)); } inline int finalize() { return MPI_Finalize(); } inline int rank(comm communicator = comm::world) { int rank; MPI_Comm_rank(communicator, &rank); return rank; } inline int size(comm communicator = comm::world) { int size; MPI_Comm_size(communicator, &size); return size; } template inline decltype(auto) reduce(T val, comm communicator = comm::world) { MPI_Reduce(&val, &val, 1, type_trait::value, operation_trait::value, 0, communicator); return val; } template ::value or std::is_same::value>> inline decltype(auto) allreduce(T val, comm communicator = comm::world) { MPI_Allreduce(&val, &val, 1, type_trait::value, operation_trait::value, communicator); return val; } template inline decltype(auto) allreduce(const StaticVector& v, comm communicator = comm::world) { Vector res; MPI_Allreduce(v.begin(), res.begin(), n, type_trait
::value, operation_trait::value, communicator); return res; } template inline decltype(auto) allreduce(const StaticMatrix& v, comm communicator = comm::world) { Matrix res; MPI_Allreduce(v.begin(), res.begin(), n * m, type_trait
::value, operation_trait::value, communicator); return res; } template inline decltype(auto) gather(const T* send, T* recv, int count, comm communicator = comm::world) { MPI_Gather(send, count, type_trait::value, recv, count, type_trait::value, 0, communicator); } } // namespace mpi_impl namespace mpi = mpi_impl; #else namespace mpi = mpi_dummy; #endif // USE_MPI } // namespace tamaas /* -------------------------------------------------------------------------- */ #endif // MPI_INTERFACE_HH diff --git a/tests/test_mpi.cpp b/tests/test_mpi.cpp index d9b44c6..ba7e2d0 100644 --- a/tests/test_mpi.cpp +++ b/tests/test_mpi.cpp @@ -1,143 +1,149 @@ /** * @file * @section LICENSE * * Copyright (©) 2016-2020 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ #include "fftw_mpi_engine.hh" #include "grid.hh" #include "grid_hermitian.hh" #include "grid_view.hh" #include "partitioner.hh" #include "test.hh" #include using namespace tamaas; using fft = fftw::helper; /* -------------------------------------------------------------------------- */ template struct span { T* ptr; std::size_t size; ~span() { fftw::free(ptr); } const T* begin() const { return ptr; } const T* end() const { return ptr + size; } T* begin() { return ptr; } T* end() { return ptr + size; } operator T*() { return ptr; } }; +/* -------------------------------------------------------------------------- */ +TEST(TestMPIInterface, SequentialGuard) { + mpi::sequential_guard guard; + ASSERT_EQ(mpi::size(), 1); +} + /* -------------------------------------------------------------------------- */ TEST(TestFFTWInterface, MPISizes) { const std::ptrdiff_t N0 = 100, N1 = 100; std::ptrdiff_t local_n0, local_n0_start; auto alloc_local = fftw_mpi_local_size_2d(N0, N1 / 2 + 1, mpi::comm::world, &local_n0, &local_n0_start); const std::ptrdiff_t N[] = {N0, N1 / 2 + 1}; auto sizes = fftw::mpi::local_size_many(2, N, 1); ASSERT_EQ(std::get<0>(sizes), alloc_local); ASSERT_EQ(std::get<1>(sizes), local_n0); ASSERT_EQ(std::get<2>(sizes), local_n0_start); } /* -------------------------------------------------------------------------- */ TEST(TestPartitioner, LocalSizes) { const std::ptrdiff_t N0 = 100, N1 = 100; std::ptrdiff_t local_n0, local_n0_start; fftw_mpi_local_size_2d(N0, N1 / 2 + 1, mpi::comm::world, &local_n0, &local_n0_start); auto local_size = Partitioner<2>::local_size({N0, N1 / 2 + 1}); ASSERT_EQ(local_size[0], local_n0); ASSERT_EQ(local_size[1], N1 / 2 + 1); ASSERT_EQ(Partitioner<2>::local_offset({N0, N1 / 2 + 1}), local_n0_start); } /* -------------------------------------------------------------------------- */ TEST(TestPartitioner, LocalOffsetInGrid) { std::array N = {10, 10}; std::array Nglobal = {mpi::allreduce(N.front()), N.back()}; Grid local(N, 1), global(Nglobal, 1); auto offset = Partitioner<2>::local_offset(local); auto local_n0_start = Partitioner<2>::local_offset(Nglobal); ASSERT_EQ(offset, &global(local_n0_start, 0) - &global(0, 0)); } /* -------------------------------------------------------------------------- */ TEST(TestFFTWMPIEngine, BasicTransform) { const std::ptrdiff_t N0 = 100, N1 = 100; auto local_size = Partitioner<2>::local_size({N0, N1 / 2 + 1}); Grid real({local_size[0], N1}, 1); GridHermitian spectral(local_size, 1); auto offset = Partitioner<2>::local_offset(real); std::iota(real.begin(), real.end(), offset); FFTWMPIEngine().forward(real, spectral); auto gathered = Partitioner<2>::gather(spectral); if (mpi::rank() != 0) return; Grid real_global({N0, N1}, 1); GridHermitian solution({N0, N1 / 2 + 1}, 1); std::iota(real_global.begin(), real_global.end(), 0); FFTWEngine().forward(real_global, solution); ASSERT_TRUE(compare(gathered, solution, AreComplexEqual())); } TEST(TestFFTWMPIEngine, ComponentsTransform) { const std::ptrdiff_t N0 = 100, N1 = 100; auto local_size = Partitioner<2>::local_size({N0, N1 / 2 + 1}); Grid real({local_size[0], N1}, 2); GridHermitian spectral(local_size, 2); auto offset = Partitioner<2>::local_offset(real); std::iota(real.begin(), real.end(), offset); FFTWMPIEngine().forward(real, spectral); auto gathered = Partitioner<2>::gather(spectral); if (mpi::rank() != 0) return; Grid real_global({N0, N1}, 2); GridHermitian solution({N0, N1 / 2 + 1}, 2); std::iota(real_global.begin(), real_global.end(), 0); FFTWEngine().forward(real_global, solution); ASSERT_TRUE(compare(gathered, solution, AreComplexEqual())); }