diff --git a/src/common/ccoord_operations.hh b/src/common/ccoord_operations.hh index 28f9d51..4e502aa 100644 --- a/src/common/ccoord_operations.hh +++ b/src/common/ccoord_operations.hh @@ -1,320 +1,320 @@ /** * @file ccoord_operations.hh * * @author Till Junge * * @date 29 Sep 2017 * * @brief common operations on pixel addressing * * Copyright © 2017 Till Junge * * µSpectre is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation, either version 3, or (at * your option) any later version. * * µSpectre is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Emacs; see the file COPYING. If not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #include #include #include #include #include #include "common/common.hh" #include "common/iterators.hh" #ifndef CCOORD_OPERATIONS_H #define CCOORD_OPERATIONS_H namespace muSpectre { namespace CcoordOps { namespace internal { //! simple helper returning the first argument and ignoring the second template constexpr T ret(T val, size_t /*dummy*/) {return val;} //! helper to build cubes template constexpr std::array cube_fun(T val, std::index_sequence) { return std::array{ret(val, I)...}; } //! computes hermitian size according to FFTW template constexpr Ccoord_t herm(const Ccoord_t & full_sizes, std::index_sequence) { return Ccoord_t{full_sizes[I]..., full_sizes.back()/2+1}; } //! compute the stride in a direction of a row-major grid template constexpr Dim_t stride(const Ccoord_t & sizes, const size_t index) { static_assert(Dim > 0, "only for positive numbers of dimensions"); auto const diff{Dim - 1 - Dim_t(index)}; Dim_t ret_val{1}; for (Dim_t i{0}; i < diff; ++i) { ret_val *= sizes[Dim-1-i]; } return ret_val; } //! get all strides from a row-major grid (helper function) template constexpr Ccoord_t compute_strides(const Ccoord_t & sizes, std::index_sequence) { return Ccoord_t{stride(sizes, I)...}; } } // internal //----------------------------------------------------------------------------// //! returns a grid of equal resolutions in each direction template constexpr std::array get_cube(T size) { return internal::cube_fun(size, std::make_index_sequence{}); } /* ---------------------------------------------------------------------- */ //! returns the hermition grid to correcsponding to a full grid template constexpr Ccoord_t get_hermitian_sizes(Ccoord_t full_sizes) { return internal::herm(full_sizes, std::make_index_sequence{}); } /* ---------------------------------------------------------------------- */ //! return physical vector of a cell of cubic pixels template Eigen::Matrix get_vector(const Ccoord_t & ccoord, Real pix_size = 1.) { Eigen::Matrix retval; for (size_t i = 0; i < dim; ++i) { retval[i] = pix_size * ccoord[i]; } return retval; } /* ---------------------------------------------------------------------- */ //! return physical vector of a cell of general pixels template Eigen::Matrix get_vector(const Ccoord_t & ccoord, Eigen::Matrix pix_size) { Eigen::Matrix retval = pix_size; for (size_t i = 0; i < dim; ++i) { retval[i] *= ccoord[i]; } return retval; } /* ---------------------------------------------------------------------- */ //! return physical vector of a cell of general pixels template Eigen::Matrix get_vector(const Ccoord_t & ccoord, const std::array& pix_size) { Eigen::Matrix retval{}; for (size_t i = 0; i < dim; ++i) { retval[i] = pix_size[i]*ccoord[i]; } return retval; } /* ---------------------------------------------------------------------- */ //! get all strides from a row-major grid template constexpr Ccoord_t get_default_strides(const Ccoord_t & sizes) { return internal::compute_strides(sizes, std::make_index_sequence{}); } //----------------------------------------------------------------------------// //! get the i-th pixel in a grid of size sizes template constexpr Ccoord_t get_ccoord(const Ccoord_t & resolutions, const Ccoord_t & locations, - Dim_t index, bool transposed=false) { + Dim_t index) { Ccoord_t retval{{0}}; Dim_t factor{1}; for (Dim_t i = dim-1; i >=0; --i) { - if (transposed) { - if (i == 0) { - retval[1] = index/factor%resolutions[0] + locations[0]; - } - else if (i == 1) { - retval[0] = index/factor%resolutions[1] + locations[1]; - } - else { - retval[i] = index/factor%resolutions[i] + locations[i]; - } - } - else { - retval[i] = index/factor%resolutions[i] + locations[i]; - } + retval[i] = index/factor%resolutions[i] + locations[i]; if (i != 0 ) { factor *= resolutions[i]; } } return retval; } + //----------------------------------------------------------------------------// + //! get the i-th pixel in a grid of size sizes + template + constexpr Ccoord_t get_ccoord(const Ccoord_t & resolutions, + const Ccoord_t & locations, + Dim_t index, + std::index_sequence) { + Ccoord_t ccoord{get_ccoord(resolutions, locations, index)}; + return Ccoord_t({ccoord[I]...}); + } + //----------------------------------------------------------------------------// //! get the linear index of a pixel in a given grid template constexpr Dim_t get_index(const Ccoord_t & sizes, const Ccoord_t & locations, const Ccoord_t & ccoord) { Dim_t retval{0}; Dim_t factor{1}; for (Dim_t i = dim-1; i >=0; --i) { retval += (ccoord[i]-locations[i])*factor; if (i != 0) { factor *= sizes[i]; } } return retval; } //----------------------------------------------------------------------------// //! get the linear index of a pixel given a set of strides template constexpr Dim_t get_index_from_strides(const Ccoord_t & strides, const Ccoord_t & ccoord) { Dim_t retval{0}; for (const auto & tup: akantu::zip(strides, ccoord)) { const auto & stride = std::get<0>(tup); const auto & ccord_ = std::get<1>(tup); retval += stride * ccord_; } return retval; } //----------------------------------------------------------------------------// //! get the number of pixels in a grid template constexpr size_t get_size(const Ccoord_t& sizes) { Dim_t retval{1}; for (size_t i = 0; i < dim; ++i) { retval *= sizes[i]; } return retval; } //----------------------------------------------------------------------------// //! get the number of pixels in a grid given its strides template constexpr size_t get_size_from_strides(const Ccoord_t& sizes, const Ccoord_t& strides) { return sizes[0]*strides[0]; } /* ---------------------------------------------------------------------- */ /** * centralises iterating over square (or cubic) discretisation grids */ - template + template class Pixels { public: //! constructor Pixels(const Ccoord_t & resolutions=Ccoord_t{}, const Ccoord_t & locations=Ccoord_t{}) :resolutions{resolutions}, locations{locations}{}; //! copy constructor Pixels(const Pixels & other) = default; //! assignment operator Pixels & operator=(const Pixels & other) = default; virtual ~Pixels() = default; /** * iterators over `Pixels` dereferences to cell coordinates */ class iterator { public: using value_type = Ccoord_t; //!< stl conformance using const_value_type = const value_type; //!< stl conformance using pointer = value_type*; //!< stl conformance using difference_type = std::ptrdiff_t; //!< stl conformance using iterator_category = std::forward_iterator_tag;//!resolutions);} protected: Ccoord_t resolutions; //!< resolutions of this domain Ccoord_t locations; //!< locations of this domain }; /* ---------------------------------------------------------------------- */ - template - Pixels::iterator::iterator(const Pixels & pixels, bool begin) + template + Pixels::iterator::iterator(const Pixels & pixels, bool begin) :pixels{pixels}, index{begin? 0: get_size(pixels.resolutions)} {} /* ---------------------------------------------------------------------- */ - template - typename Pixels::iterator::value_type - Pixels::iterator::operator*() const { + template + typename Pixels::iterator::value_type + Pixels::iterator::operator*() const { return get_ccoord(pixels.resolutions, pixels.locations, this->index, - transposed); + std::conditional_t, + std::index_sequence>{}); } /* ---------------------------------------------------------------------- */ - template + template bool - Pixels::iterator::operator!=(const iterator &other) const { + Pixels::iterator::operator!=(const iterator &other) const { return (this->index != other.index) || (&this->pixels != &other.pixels); } /* ---------------------------------------------------------------------- */ - template + template bool - Pixels::iterator::operator==(const iterator &other) const { + Pixels::iterator::operator==(const iterator &other) const { return !(*this!= other); } /* ---------------------------------------------------------------------- */ - template - typename Pixels::iterator& - Pixels::iterator::operator++() { + template + typename Pixels::iterator& + Pixels::iterator::operator++() { ++this->index; return *this; } } // CcoordOps } // muSpectre #endif /* CCOORD_OPERATIONS_H */ diff --git a/src/fft/fftwmpi_engine.cc b/src/fft/fftwmpi_engine.cc index 7d17f1f..1bf7c6f 100644 --- a/src/fft/fftwmpi_engine.cc +++ b/src/fft/fftwmpi_engine.cc @@ -1,223 +1,227 @@ /** * @file fftwmpi_engine.cc * * @author Lars Pastewka * * @date 06 Mar 2017 * * @brief implements the MPI-parallel fftw engine * * Copyright © 2017 Till Junge * * µSpectre is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation, either version 3, or (at * your option) any later version. * * µSpectre is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Emacs; see the file COPYING. If not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #include "common/ccoord_operations.hh" #include "fft/fftwmpi_engine.hh" namespace muSpectre { template int FFTWMPIEngine::nb_engines{0}; template FFTWMPIEngine::FFTWMPIEngine(Ccoord resolutions, Rcoord lengths, Communicator comm) :Parent{resolutions, lengths, comm} { if (!this->nb_engines) fftw_mpi_init(); this->nb_engines++; std::array narr; std::copy(this->domain_resolutions.begin(), this->domain_resolutions.end(), narr.begin()); narr[DimS-1] = this->domain_resolutions[DimS-1]/2+1; ptrdiff_t res_x, loc_x, res_y, loc_y; this->workspace_size = fftw_mpi_local_size_many_transposed(DimS, narr.data(), Field_t::nb_components, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, this->comm.get_mpi_comm(), &res_x, &loc_x, &res_y, &loc_y); this->fourier_resolutions[1] = this->fourier_resolutions[0]; this->fourier_locations[1] = this->fourier_locations[0]; this->resolutions[0] = res_x; this->locations[0] = loc_x; this->fourier_resolutions[0] = res_y; this->fourier_locations[0] = loc_y; for (auto & n: this->resolutions) { if (n == 0) { throw std::runtime_error("FFTW MPI planning returned zero resolution. " "You may need to run on fewer processes."); } } for (auto & n: this->fourier_resolutions) { if (n == 0) { throw std::runtime_error("FFTW MPI planning returned zero Fourier " "resolution. You may need to run on fewer " "processes."); } } - for (auto && pixel: CcoordOps::Pixels(this->fourier_resolutions, - this->fourier_locations)) { + for (auto && pixel: + std::conditional_t< + DimS==2, + CcoordOps::Pixels, + CcoordOps::Pixels + >(this->fourier_resolutions, this->fourier_locations)) { this->work_space_container.add_pixel(pixel); } } /* ---------------------------------------------------------------------- */ template void FFTWMPIEngine::initialise(FFT_PlanFlags plan_flags) { if (this->initialised) { throw std::runtime_error("double initialisation, will leak memory"); } // Initialize parent after local resolutions have been determined and // work space has been initialized Parent::initialise(plan_flags); this->real_workspace = fftw_alloc_real(2*this->workspace_size); // We need to check whether the workspace provided by our field is large // enough. MPI parallel FFTW may request a workspace size larger than the // nominal size of the complex buffer. if (long(this->work.size()*Field_t::nb_components) < this->workspace_size) { this->work.set_pad_size(this->workspace_size - Field_t::nb_components*this->work.size()); } unsigned int flags; switch (plan_flags) { case FFT_PlanFlags::estimate: { flags = FFTW_ESTIMATE; break; } case FFT_PlanFlags::measure: { flags = FFTW_MEASURE; break; } case FFT_PlanFlags::patient: { flags = FFTW_PATIENT; break; } default: throw std::runtime_error("unknown planner flag type"); break; } std::array narr; std::copy(this->domain_resolutions.begin(), this->domain_resolutions.end(), narr.begin()); Real * in = this->real_workspace; fftw_complex * out = reinterpret_cast(this->work.data()); this->plan_fft = fftw_mpi_plan_many_dft_r2c( DimS, narr.data(), Field_t::nb_components, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, in, out, this->comm.get_mpi_comm(), FFTW_MPI_TRANSPOSED_OUT | flags); if (this->plan_fft == nullptr) { throw std::runtime_error("r2c plan failed"); } fftw_complex * i_in = reinterpret_cast(this->work.data()); Real * i_out = this->real_workspace; this->plan_ifft = fftw_mpi_plan_many_dft_c2r( DimS, narr.data(), Field_t::nb_components, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, i_in, i_out, this->comm.get_mpi_comm(), FFTW_MPI_TRANSPOSED_IN | flags); if (this->plan_ifft == nullptr) { throw std::runtime_error("c2r plan failed"); } this->initialised = true; } /* ---------------------------------------------------------------------- */ template FFTWMPIEngine::~FFTWMPIEngine() noexcept { if (this->real_workspace != nullptr) fftw_free(this->real_workspace); if (this->plan_fft != nullptr) fftw_destroy_plan(this->plan_fft); if (this->plan_ifft != nullptr) fftw_destroy_plan(this->plan_ifft); this->nb_engines--; if (!this->nb_engines) fftw_mpi_cleanup(); } /* ---------------------------------------------------------------------- */ template typename FFTWMPIEngine::Workspace_t & FFTWMPIEngine::fft (Field_t & field) { if (this->plan_fft == nullptr) { throw std::runtime_error("fft plan not initialised"); } if (field.size() != CcoordOps::get_size(this->resolutions)) { throw std::runtime_error("size mismatch"); } // Copy non-padded field to padded real_workspace. // Transposed output of M x N x L transform for >= 3 dimensions is padded // M x N x 2*(L/2+1). ptrdiff_t fstride = Field_t::nb_components*this->resolutions[DimS-1]; ptrdiff_t wstride = Field_t::nb_components*2*(this->resolutions[DimS-1]/2+1); ptrdiff_t n = field.size()/this->resolutions[DimS-1]; auto fdata = field.data(); auto wdata = this->real_workspace; for (int i = 0; i < n; i++) { std::copy(fdata, fdata+fstride, wdata); fdata += fstride; wdata += wstride; } // Compute FFT fftw_mpi_execute_dft_r2c( this->plan_fft, this->real_workspace, reinterpret_cast(this->work.data())); return this->work; } /* ---------------------------------------------------------------------- */ template void FFTWMPIEngine::ifft (Field_t & field) const { if (this->plan_ifft == nullptr) { throw std::runtime_error("ifft plan not initialised"); } if (field.size() != CcoordOps::get_size(this->resolutions)) { throw std::runtime_error("size mismatch"); } // Compute inverse FFT fftw_mpi_execute_dft_c2r( this->plan_ifft, reinterpret_cast(this->work.data()), this->real_workspace); // Copy non-padded field to padded real_workspace. // Transposed output of M x N x L transform for >= 3 dimensions is padded // M x N x 2*(L/2+1). ptrdiff_t fstride = Field_t::nb_components*this->resolutions[DimS-1]; ptrdiff_t wstride = Field_t::nb_components*2*(this->resolutions[DimS-1]/2+1); ptrdiff_t n = field.size()/this->resolutions[DimS-1]; auto fdata = field.data(); auto wdata = this->real_workspace; for (int i = 0; i < n; i++) { std::copy(wdata, wdata+fstride, fdata); fdata += fstride; wdata += wstride; } } template class FFTWMPIEngine; template class FFTWMPIEngine; template class FFTWMPIEngine; } // muSpectre diff --git a/src/fft/pfft_engine.cc b/src/fft/pfft_engine.cc index a810d08..b827223 100644 --- a/src/fft/pfft_engine.cc +++ b/src/fft/pfft_engine.cc @@ -1,235 +1,242 @@ /** * @file pfft_engine.cc * * @author Lars Pastewka * * @date 06 Mar 2017 * * @brief implements the MPI-parallel pfft engine * * Copyright © 2017 Till Junge * * µSpectre is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation, either version 3, or (at * your option) any later version. * * µSpectre is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Emacs; see the file COPYING. If not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #include "common/ccoord_operations.hh" #include "fft/pfft_engine.hh" namespace muSpectre { template int PFFTEngine::nb_engines{0}; template PFFTEngine::PFFTEngine(Ccoord resolutions, Rcoord lengths, Communicator comm) :Parent{resolutions, lengths, comm} { if (!this->nb_engines) pfft_init(); this->nb_engines++; int size{comm.size()}; int dimx{size}; int dimy{1}; - if (DimS > 2) { + //if (DimS > 2) { + if (false) { dimy = int(sqrt(size)); while ((size/dimy)*dimy != size) dimy--; dimx = size/dimy; } std::cout << "PFFT process mesh: " << dimx << "x" << dimy << std::endl; - if (DimS > 2) { + //if (DimS > 2) { + if (false) { if (pfft_create_procmesh_2d(this->comm.get_mpi_comm(), dimx, dimy, &this->mpi_comm)) { throw std::runtime_error("Failed to create 2d PFFT process mesh."); } } else { this->mpi_comm = this->comm.get_mpi_comm(); } // FIXME! Code presently always use a one-dimensional process mesh but // should use a two-dimensional process mesh for three-dimensional data. std::array narr; std::copy(this->domain_resolutions.begin(), this->domain_resolutions.end(), narr.begin()); ptrdiff_t res[DimS], loc[DimS], fres[DimS], floc[DimS]; this->workspace_size = pfft_local_size_many_dft_r2c(DimS, narr.data(), narr.data(), narr.data(), Field_t::nb_components, PFFT_DEFAULT_BLOCK, PFFT_DEFAULT_BLOCKS, this->mpi_comm, PFFT_TRANSPOSED_OUT, res, loc, fres, floc); std::copy(res, res+DimS, this->resolutions.begin()); std::copy(loc, loc+DimS, this->locations.begin()); std::copy(fres, fres+DimS, this->fourier_resolutions.begin()); std::copy(floc, floc+DimS, this->fourier_locations.begin()); - for (int i = 0; i < DimS-1; i++) { + //for (int i = 0; i < DimS-1; i++) { + for (int i = 0; i < 1; i++) { std::swap(this->fourier_resolutions[i], this->fourier_resolutions[i+1]); std::swap(this->fourier_locations[i], this->fourier_locations[i+1]); } for (auto & n: this->resolutions) { if (n == 0) { throw std::runtime_error("PFFT planning returned zero resolution. " "You may need to run on fewer processes."); } } for (auto & n: this->fourier_resolutions) { if (n == 0) { throw std::runtime_error("PFFT planning returned zero Fourier " "resolution. You may need to run on fewer " "processes."); } } - for (auto && pixel: CcoordOps::Pixels(this->fourier_resolutions, - this->fourier_locations)) { + for (auto && pixel: + std::conditional_t< + DimS==2, + CcoordOps::Pixels, + CcoordOps::Pixels + >(this->fourier_resolutions, this->fourier_locations)) { this->work_space_container.add_pixel(pixel); } } /* ---------------------------------------------------------------------- */ template void PFFTEngine::initialise(FFT_PlanFlags plan_flags) { if (this->initialised) { throw std::runtime_error("double initialisation, will leak memory"); } // Initialize parent after local resolutions have been determined and // work space has been initialized Parent::initialise(plan_flags); this->real_workspace = pfft_alloc_real(2*this->workspace_size); // We need to check whether the workspace provided by our field is large // enough. PFFT may request a workspace size larger than the nominal size // of the complex buffer. if (long(this->work.size()*Field_t::nb_components) < this->workspace_size) { this->work.set_pad_size(this->workspace_size - Field_t::nb_components*this->work.size()); } unsigned int flags; switch (plan_flags) { case FFT_PlanFlags::estimate: { flags = PFFT_ESTIMATE; break; } case FFT_PlanFlags::measure: { flags = PFFT_MEASURE; break; } case FFT_PlanFlags::patient: { flags = PFFT_PATIENT; break; } default: throw std::runtime_error("unknown planner flag type"); break; } std::array narr; std::copy(this->domain_resolutions.begin(), this->domain_resolutions.end(), narr.begin()); Real * in = this->real_workspace; pfft_complex * out = reinterpret_cast(this->work.data()); this->plan_fft = pfft_plan_many_dft_r2c(DimS, narr.data(), narr.data(), narr.data(), Field_t::nb_components, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, in, out, this->mpi_comm, PFFT_FORWARD, PFFT_TRANSPOSED_OUT | flags); if (this->plan_fft == nullptr) { throw std::runtime_error("r2c plan failed"); } pfft_complex * i_in = reinterpret_cast(this->work.data()); Real * i_out = this->real_workspace; this->plan_ifft = pfft_plan_many_dft_c2r(DimS, narr.data(), narr.data(), narr.data(), Field_t::nb_components, PFFT_DEFAULT_BLOCKS, PFFT_DEFAULT_BLOCKS, i_in, i_out, this->mpi_comm, PFFT_BACKWARD, PFFT_TRANSPOSED_IN | flags); if (this->plan_ifft == nullptr) { throw std::runtime_error("c2r plan failed"); } this->initialised = true; } /* ---------------------------------------------------------------------- */ template PFFTEngine::~PFFTEngine() noexcept { if (this->real_workspace != nullptr) pfft_free(this->real_workspace); if (this->plan_fft != nullptr) pfft_destroy_plan(this->plan_fft); if (this->plan_ifft != nullptr) pfft_destroy_plan(this->plan_ifft); if (this->mpi_comm != this->comm.get_mpi_comm()) { MPI_Comm_free(&this->mpi_comm); } this->nb_engines--; if (!this->nb_engines) pfft_cleanup(); } /* ---------------------------------------------------------------------- */ template typename PFFTEngine::Workspace_t & PFFTEngine::fft (Field_t & field) { if (!this->plan_fft) { throw std::runtime_error("fft plan not allocated"); } if (field.size() != CcoordOps::get_size(this->resolutions)) { throw std::runtime_error("size mismatch"); } // Copy field data to workspace buffer. This is necessary because workspace // buffer is larger than field buffer. std::copy(field.data(), field.data()+Field_t::nb_components*field.size(), this->real_workspace); pfft_execute_dft_r2c( this->plan_fft, this->real_workspace, reinterpret_cast(this->work.data())); return this->work; } /* ---------------------------------------------------------------------- */ template void PFFTEngine::ifft (Field_t & field) const { if (!this->plan_ifft) { throw std::runtime_error("ifft plan not allocated"); } if (field.size() != CcoordOps::get_size(this->resolutions)) { throw std::runtime_error("size mismatch"); } pfft_execute_dft_c2r( this->plan_ifft, reinterpret_cast(this->work.data()), this->real_workspace); std::copy(this->real_workspace, this->real_workspace+Field_t::nb_components*field.size(), field.data()); } template class PFFTEngine; template class PFFTEngine; template class PFFTEngine; } // muSpectre