diff --git a/poisson/.solutions-cartesian-alltoall/AUTHORS b/poisson/.solutions-cartesian-alltoall/AUTHORS new file mode 100644 index 0000000..9253635 --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/AUTHORS @@ -0,0 +1,4 @@ +Authors: + - Nicolas Richart + - Vincent Keller + . Vittoria Rezzonico diff --git a/poisson/.solutions-cartesian-alltoall/COPYING b/poisson/.solutions-cartesian-alltoall/COPYING new file mode 100644 index 0000000..e551330 --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/COPYING @@ -0,0 +1,24 @@ +Copyright (c) 2017, Ecole Polytechnique Federal de Lausanne - Scientific IT and +Application Support (SCITAS) All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/poisson/.solutions-cartesian-alltoall/Makefile b/poisson/.solutions-cartesian-alltoall/Makefile new file mode 100644 index 0000000..00fdc48 --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/Makefile @@ -0,0 +1,21 @@ +CXX=mpic++ +#CXX=g++ +LD=${CXX} +#VERIFY=-fsanitize=thread +VERIFY= -D_DEBUG -D_DEBUG_MPI +OPENMP=-fopenmp +NOOPENMP=-fno-openmp -Wno-unknown-pragmas +#OPTIM+=-O3 -D_MPI $(NOOPENMP) +OPTIM+=-O3 -D_MPI $(NOOPENMP) -march=native -g +CXXFLAGS+=-Wall -Wextra -Werror -pedantic -std=c++14 -g $(OPTIM) $(VERIFY) +LDFLAGS+=-lm $(CXXFLAGS) + +OBJS=poisson.o grid.o double_buffer.o simulation.o dumpers.o + +all: poisson + +poisson: $(OBJS) + $(LD) -o $@ $(OBJS) $(LDFLAGS) + +clean: + rm -f hello poisson *.o *~ diff --git a/poisson/.solutions-cartesian-alltoall/double_buffer.cc b/poisson/.solutions-cartesian-alltoall/double_buffer.cc new file mode 100644 index 0000000..978eda5 --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/double_buffer.cc @@ -0,0 +1,31 @@ +/* -------------------------------------------------------------------------- */ +#include "double_buffer.hh" +#include "grid.hh" +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +DoubleBuffer::DoubleBuffer(int m, int n) + : m_current(new Grid(m, n)), m_old(new Grid(m, n)) {} + +/* -------------------------------------------------------------------------- */ +Grid & DoubleBuffer::current() { return *m_current; } + +/* -------------------------------------------------------------------------- */ +Grid & DoubleBuffer::old() { return *m_old; } + +/* -------------------------------------------------------------------------- */ +const Grid & DoubleBuffer::current() const { return *m_current; } + +/* -------------------------------------------------------------------------- */ +const Grid & DoubleBuffer::old() const { return *m_old; } + +/* -------------------------------------------------------------------------- */ +void DoubleBuffer::swap() { + m_current.swap(m_old); +} + +/* -------------------------------------------------------------------------- */ +void DoubleBuffer::resize(int m, int n) { + m_current->resize(m, n); + m_old->resize(m, n); +} diff --git a/poisson/.solutions-cartesian-alltoall/double_buffer.hh b/poisson/.solutions-cartesian-alltoall/double_buffer.hh new file mode 100644 index 0000000..1d21b5c --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/double_buffer.hh @@ -0,0 +1,28 @@ +#ifndef DOUBLE_BUFFER +#define DOUBLE_BUFFER + +/* -------------------------------------------------------------------------- */ +#include +/* -------------------------------------------------------------------------- */ +#include "grid.hh" +/* -------------------------------------------------------------------------- */ + +class DoubleBuffer { +public: + DoubleBuffer(int m = 0 , int n = 0); + + void resize(int m, int n); + Grid & current(); + Grid & old(); + + const Grid & current() const; + const Grid & old() const; + + void swap(); +private: + + std::unique_ptr m_current; + std::unique_ptr m_old; +}; + +#endif /* DOUBLE_BUFFER */ diff --git a/poisson/.solutions-cartesian-alltoall/dumpers.cc b/poisson/.solutions-cartesian-alltoall/dumpers.cc new file mode 100644 index 0000000..c626753 --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/dumpers.cc @@ -0,0 +1,205 @@ +/* -------------------------------------------------------------------------- */ +#include "dumpers.hh" +#include "grid.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +void Dumper::set_min(float min) { m_min = min; } + +void Dumper::set_max(float max) { m_max = max; } + +float Dumper::min() const { return m_min; } + +float Dumper::max() const { return m_max; } + +/* -------------------------------------------------------------------------- */ +void DumperASCII::dump(const Grid & grid, int step) { + std::ofstream fout; + std::stringstream sfilename; + + sfilename << "out_" << std::setfill('0') << std::setw(5) << step << ".pgm"; + fout.open(sfilename.str()); + + int m = grid.m(); + int n = grid.n(); + + fout << "P2" << std::endl << "# CREATOR: Poisson program" << std::endl; + fout << m << " " << n << std::endl; + fout << 255 << std::endl; + + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + int v = 255. * (grid(i, j) - m_min) / (m_max - m_min); + v = std::min(v, 255); + fout << v << std::endl; + } + } +} + +/* -------------------------------------------------------------------------- */ +void DumperBinary::dump(const Grid & grid, int step) { + std::stringstream sfilename; + + std::array coords{0, 0}, dims{0, 0}; + + int prank{0}, psize{1}; + MPI_Comm_rank(m_communicator, &prank); + MPI_Comm_size(m_communicator, &psize); + + std::array periods{0, 0}; + MPI_Cart_get(m_communicator, dims.size(), dims.data(), periods.data(), coords.data()); + + sfilename << "out_" << std::setfill('0') << std::setw(5) << step << ".bmp"; + + int h = grid.m(); + int w = grid.n(); + int global_w = w; + + std::array starts = {0, 0}; + std::array local_sizes = {h, w}; + std::array global_sizes = local_sizes; + + // removing the ghosts + if(prank == 0) std::cout << dims[0] << " " << dims[1] << "\n"; + + for (std::size_t i = 0; i < local_sizes.size(); ++i) { + if (dims[i] > 1) { + local_sizes[i] -= (coords[i] == 0 || coords[i] == dims[i] - 1 ? 1 : 2); + starts[i] += (coords[i] == 0 ? 0 : 1); + } + } + + std::array comms; + + // determining the local offset + std::array offset = {0, 0}; + + std::vector> line_col_size(2); + + for (size_t i = 0; i < line_col_size.size(); ++i) { + std::array remain_dims = {false, false}; + remain_dims[i] = true; + MPI_Cart_sub(m_communicator, remain_dims.data(), &(comms[i])); + + line_col_size[i].resize(dims[i]); + MPI_Allgather(&local_sizes[i], 1, MPI_INT, line_col_size[i].data(), 1, + MPI_INT, comms[i]); + + global_sizes[i] = 0; + for (int j = 0; j < dims[i]; ++j) { + global_sizes[i] += line_col_size[i][j]; + } + } + + //for (int i = dims[0] - 1; i > coords[0]; --i) { + for (int i = 0; i < coords[0]; ++i) { + offset[0] += line_col_size[0][i]; + } + + for (int i = 0; i < coords[1]; ++i) { + offset[1] += line_col_size[1][i]; + } + + global_w = 4 * ((24 * global_sizes[1] + 31) / 32); + + // if the file width (3*w) is not a multiple of 4 adds enough bytes to make it + // a multiple of 4 + int padding = global_w - 3 * global_sizes[1]; + + w = 3 * local_sizes[1]; + + if (coords[1] == dims[1] - 1) { + w += padding; + } else { + padding = 0; + } + + int filesize = 54 + global_sizes[0] * global_w; + + std::vector img(local_sizes[0] * w); + std::fill(img.begin(), img.end(), 0); + + for (int i = 0; i < local_sizes[0]; i++) { + for (int j = 0; j < local_sizes[1]; j++) { + float v = + ((grid(i + starts[0], j + starts[1]) - m_min) / (m_max - m_min)); + + float r = v * 255; // Red channel + float g = v * 255; // Green channel + float b = prank * 255 / psize; // Red channel + + r = std::min(r, 255.f); + g = std::min(g, 255.f); + b = std::min(b, 255.f); + + img[w * i + 3 * j + 0] = r; + img[w * i + 3 * j + 1] = g; + img[w * i + 3 * j + 2] = b; + } + } + + auto write_bytes = [](auto && array, auto && value) { + array[0] = value; + array[1] = value >> 8; + array[2] = value >> 16; + array[3] = value >> 24; + }; + + std::array bmpfileheader = {'B', 'M', 0, 0, 0, 0, 0, + 0, 0, 0, 54, 0, 0, 0}; + std::array bmpinfoheader = {40, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 24, 0}; + + write_bytes(&bmpfileheader[2], filesize); + + write_bytes(&bmpinfoheader[4], global_sizes[1]); + write_bytes(&bmpinfoheader[8], global_sizes[0]); + write_bytes(&bmpinfoheader[20], filesize - 54); + + MPI_File fh; + MPI_Status status; + MPI_Datatype subarray_t; + +#if defined(_DEBUG_MPI) + for (int i = 0; i < psize; ++i) { + if (prank == i) + std::cerr << prank << " (" << coords[0] << ", " << coords[1] << ")" + << " - " << local_sizes[0] << "x" << local_sizes[1] << "(" << w + << ") + " << offset[0] << "x" << offset[1] << "("<< 3*offset[1]<<") ++ [" + << global_sizes[0] << "x" << global_sizes[1] << "(" << global_w + << ")] " << padding << "\n"; + MPI_Barrier(m_communicator); + } +#endif + + global_sizes[1] = global_w; + local_sizes[1] = w; + offset[1] *= 3; + + MPI_Type_create_subarray(global_sizes.size(), global_sizes.data(), + local_sizes.data(), offset.data(), MPI_ORDER_C, + MPI_CHAR, &subarray_t); + MPI_Type_commit(&subarray_t); + + // opening a file in write and create mode + MPI_File_open(MPI_COMM_WORLD, sfilename.str().c_str(), + MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh); + // defining the size of the file + MPI_File_set_size(fh, filesize); + + // rank 0 writes the header + if (prank == 0) { + MPI_File_write_at(fh, 0, bmpfileheader.data(), 14, MPI_CHAR, &status); + MPI_File_write_at(fh, 14, bmpinfoheader.data(), 40, MPI_CHAR, &status); + } + + MPI_File_set_view(fh, 54, MPI_CHAR, subarray_t, "native", MPI_INFO_NULL); + MPI_File_write(fh, img.data(), img.size(), MPI_CHAR, &status); + MPI_File_close(&fh); +} diff --git a/poisson/.solutions-cartesian-alltoall/dumpers.hh b/poisson/.solutions-cartesian-alltoall/dumpers.hh new file mode 100644 index 0000000..dc9b013 --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/dumpers.hh @@ -0,0 +1,58 @@ +#if defined(_MPI) +#include +#endif + +#ifndef DUMPERS_HH +#define DUMPERS_HH + +/* -------------------------------------------------------------------------- */ +class Grid; + +/* -------------------------------------------------------------------------- */ +class Dumper { +public: +#if defined(_MPI) + explicit Dumper(MPI_Comm communicator) + : m_min(-1.), m_max(1.), m_communicator(communicator) {} +#else + Dumper() : m_min(-1.), m_max(1.) {} +#endif + virtual void dump(const Grid & m_grid, int step) = 0; + + void set_min(float min); + void set_max(float max); + + float min() const; + float max() const; + +protected: + float m_min, m_max; +#if defined(_MPI) + MPI_Comm m_communicator; +#endif +}; + +/* -------------------------------------------------------------------------- */ +class DumperASCII : public Dumper { +public: +#if defined(_MPI) + explicit DumperASCII(MPI_Comm communicator) : Dumper(communicator) {} +#else + DumperASCII() : Dumper() {} +#endif + + virtual void dump(const Grid & grid, int step); +}; + +/* -------------------------------------------------------------------------- */ +class DumperBinary : public Dumper { +public: +#if defined(_MPI) + explicit DumperBinary(MPI_Comm communicator) : Dumper(communicator) {} +#else + DumperBinary() : Dumper() {} +#endif + virtual void dump(const Grid & grid, int step); +}; + +#endif /* DUMPERS_HH */ diff --git a/poisson/.solutions-cartesian-alltoall/grid.cc b/poisson/.solutions-cartesian-alltoall/grid.cc new file mode 100644 index 0000000..1a46b04 --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/grid.cc @@ -0,0 +1,29 @@ +/* -------------------------------------------------------------------------- */ +#include "grid.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#if defined(_DEBUG_MPI) +#include +#endif +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +Grid::Grid(int m, int n) : m_m(m), m_n(n), m_storage(m * n) { clear(); } + +/* -------------------------------------------------------------------------- */ +Grid::~Grid() = default; + +/* -------------------------------------------------------------------------- */ +void Grid::clear() { std::fill(m_storage.begin(), m_storage.end(), 0.); } + +/* -------------------------------------------------------------------------- */ +void Grid::resize(int m, int n) { + m_m = m; + m_n = n; + m_storage.resize(m * n); +} + +/* -------------------------------------------------------------------------- */ +int Grid::m() const { return m_m; } +int Grid::n() const { return m_n; } diff --git a/poisson/.solutions-cartesian-alltoall/grid.hh b/poisson/.solutions-cartesian-alltoall/grid.hh new file mode 100644 index 0000000..8c5a865 --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/grid.hh @@ -0,0 +1,53 @@ +#ifndef GRID_HH +#define GRID_HH + +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ +namespace { +struct DEBUG { +#if defined(DEBUG) + static void print(const std::string & func) { + std::cerr << func << "\n"; + } +#else + static void print(const std::string & ) { + + } +#endif +}; +} // namespace + +/* -------------------------------------------------------------------------- */ + +class Grid { +public: + Grid(int m = 0, int n = 0); + ~Grid(); + + /// access the value [i][j] of the grid + inline float & operator()(int i, int j) { return m_storage[i * m_n + j]; } + inline const float & operator()(int i, int j) const { + return m_storage[i * m_n + j]; + } + + void resize(int m, int n); + + /// set the grid to 0 + void clear(); + + int m() const; + int n() const; + +private: + int m_m, m_n; + std::vector m_storage; + + /// request for asynchronous communications + std::array m_requests; +}; + +#endif /* GRID_HH */ diff --git a/poisson/.solutions-cartesian-alltoall/optim.sub b/poisson/.solutions-cartesian-alltoall/optim.sub new file mode 100644 index 0000000..3e4440e --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/optim.sub @@ -0,0 +1,35 @@ +#!/bin/bash + +##SBATCH --reservation=phpc2017 +##SBATCH --account=phpc2017 +#SBATCH -N 1 +#SBATCH -n 1 + +OPTIMS="-O0 -O1 -O2 -O3" +N=512 + + +module load gcc + +echo gcc > gcc.out +for _opt in ${OPTIMS} '-O3 -ftree-vectorize' +do + make clean + make CXXFLAGS="-std=c++11 ${_opt}" CXX=g++ + ./poisson $N| cut -f4 -d " " >> gcc.out +done + + +module load intel + +echo icpc > intel.out +for _opt in ${OPTIMS} '-O3 -xHOST' +do + make clean + make CXXFLAGS="-std=c++11 ${_opt}" CXX=icpc + ./poisson $N | cut -f4 -d " " >> intel.out +done + +echo " ${OPTIMS} specific" | tr " " "\n" > header + +paste header gcc.out intel.out > compilers.out diff --git a/poisson/.solutions-cartesian-alltoall/poisson.cc b/poisson/.solutions-cartesian-alltoall/poisson.cc new file mode 100644 index 0000000..95f843b --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/poisson.cc @@ -0,0 +1,73 @@ +/* -------------------------------------------------------------------------- */ +#include "simulation.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ +#include +/* -------------------------------------------------------------------------- */ + +#define EPSILON 0.05 + +typedef std::chrono::high_resolution_clock clk; +typedef std::chrono::duration second; + +static void usage(const std::string & prog_name) { + std::cerr << prog_name << " " << std::endl; + exit(0); +} + +int main(int argc, char * argv[]) { + int prank{0}, psize{1}, nthreads{1}; + MPI_Init(NULL, NULL); + // assert(provided == MPI_THREAD_MULTIPLE); + + MPI_Comm_rank(MPI_COMM_WORLD, &prank); + MPI_Comm_size(MPI_COMM_WORLD, &psize); + + if (argc != 2) + usage(argv[0]); + + std::stringstream args(argv[1]); + int N; + args >> N; + + if (args.fail()) + usage(argv[0]); + + std::unique_ptr simu; + + simu = std::make_unique(N, N, MPI_COMM_WORLD); + + simu->set_initial_conditions(); + simu->set_epsilon(EPSILON); + + float l2; + int k; + + auto start = clk::now(); + std::tie(l2, k) = simu->compute(); + auto end = clk::now(); + + second time = end - start; + + auto min = simu->min(); + auto max = simu->max(); + + if (prank == 0) + std::cout << "MPI: " << psize << " (" << simu->dims(0) << ", " << simu->dims(1) << ")\n" + << "OMP: " << nthreads << "\n" + << "Grid: " << N << "x" << N << "\n" + << "Bound: " << "[ " << min << " : " << max << " ]" << "\n" + << "Iter: " << k << "\n" + << "Norm: " << std::scientific << l2 << "\n" + << "Time: " << time.count() << "s" << "\n" + << "Flops: " << simu->complexity()*k/time.count()/1e9 << " GFlops" << std::endl; + + simu.release(); // To force destruction of MPI related stuff + MPI_Finalize(); + + return 0; +} diff --git a/poisson/.solutions-cartesian-alltoall/poisson.sub b/poisson/.solutions-cartesian-alltoall/poisson.sub new file mode 100644 index 0000000..aa43198 --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/poisson.sub @@ -0,0 +1,31 @@ +#!/bin/bash + +##SBATCH --reservation=phpc2017 +##SBATCH --account=phpc2017 +#SBATCH -N 1 +#SBATCH -n 1 + +module load gcc + +rm -f complexity.dat + +for i in 128 256 512 1024 +do + ./poisson $i >> complexity.dat +done + +slmodules -r deprecated +module load gnuplot + +gnuplot < +#include +#include +#include +#include + +/* -------------------------------------------------------------------------- */ +Simulation::Simulation(int m, int n, MPI_Comm communicator) { + + // retrieving the number of proc and the rank in the proc pool + MPI_Comm_size(communicator, &m_psize); + + m_dims = {0, 0}; + std::array periods{0, 0}; + MPI_Dims_create(m_psize, m_dims.size(), m_dims.data()); + + assert(m_dims[0] * m_dims[1] == m_psize && "MPI_Dims_create is fucked up"); + + MPI_Cart_create(communicator, m_dims.size(), m_dims.data(), periods.data(), 0, + &m_communicator); + + MPI_Comm_rank(m_communicator, &m_prank); + + MPI_Cart_coords(m_communicator, m_prank, m_coords.size(), m_coords.data()); + + m_dumper = std::make_unique(m_communicator); + + m_global_size = {m, n}; + + std::array ghosts{0, 0}; + + for (std::size_t i = 0; i < m_local_size.size(); ++i) { + // computation of the local size of the grid the remainder is spread equally + // on the first processors + m_local_size[i] = m_global_size[i] / m_dims[i] + + (m_coords[i] < m_global_size[i] % m_dims[i] ? 1 : 0); + + // adding the ghosts lines if needed + if (m_dims[i] > 1) + ghosts[i] = (m_coords[i] == 0 || m_coords[i] == m_dims[i] - 1) ? 1 : 2; + + m_local_size[i] += ghosts[i]; + + // computing the offsets of the local grid in the global one + m_offset[i] = (m_global_size[i] / m_dims[i]) * m_coords[i] + + (m_coords[i] < m_global_size[i] % m_dims[i] + ? m_coords[i] + : m_global_size[i] % m_dims[i]); + + m_h[i] = 1. / m_global_size[i]; + } + +#if defined(_DEBUG_MPI) + for (int i = 0; i < m_psize; ++i) { + if (m_prank == i) + std::cerr << m_prank << " (" << m_coords[0] << ", " << m_coords[1] << ")" + << " - " << m_local_size[0] << "x" << m_local_size[1] << " + " + << m_offset[0] << "x" << m_offset[1] << " ++ [" << ghosts[0] + << "x" << ghosts[1] << "]\n"; + MPI_Barrier(m_communicator); + } +#endif + + // resizing the different grids + m_grids.resize(m_local_size[0], m_local_size[1]); + m_f.resize(m_local_size[0], m_local_size[1]); + + MPI_Type_vector(1, m_local_size[1] - ghosts[1], 1, MPI_FLOAT, &m_line_t); + MPI_Type_vector(m_local_size[0] - ghosts[0], 1, m_local_size[1], MPI_FLOAT, &m_column_t); + + MPI_Type_commit(&m_column_t); + MPI_Type_commit(&m_line_t); + + auto & uo = m_grids.old(); + + auto displ = [&](auto && d) { + MPI_Aint addr0, addr1; + MPI_Get_address(&uo(0, 0), &addr0); + MPI_Get_address(&d, &addr1); + return MPI_Aint_diff(addr1, addr0); + }; + + m_sdispls = { + displ(uo(1, 1)), + displ(uo(m_local_size[0] - 2, 1)), + displ(uo(1, 1)), + displ(uo(1, m_local_size[1] - 2)) + }; + + m_rdispls = { + displ(uo(0, 1)), + displ(uo(m_local_size[0] - 1, 1)), + displ(uo(1, 0)), + displ(uo(1, m_local_size[1] - 1)), + }; + + m_types = {m_line_t, m_line_t, m_column_t, m_column_t}; + m_counts = {1, 1, 1, 1}; +} + +/* -------------------------------------------------------------------------- */ +void Simulation::set_initial_conditions() { + std::array start = {0, 0}; + std::array end = m_local_size; + + for (int i = 0; i < 2; ++i) { + start[i] += (m_coords[i] == 0 ? 0 : 1); + end[i] -= (m_coords[i] == m_dims[i] - 1 ? 0 : 1); + } + + float min = 0., max = 0.; + float a = 5.5 * M_PI; + for (int i = start[0]; i < end[0]; i++) { + for (int j = start[1]; j < end[1]; j++) { + m_f(i, j) = + -2. * a * a * std::sin(a * (m_offset[0] + i - start[0]) * m_h[0]) * + std::sin(a * (m_offset[1] + j - start[1]) * m_h[1]) * m_h[0] * m_h[1]; + + min = std::min(min, m_f(i, j)); + max = std::max(max, m_f(i, j)); + } + } + + m_dumper->set_min(min); + m_dumper->set_max(max); + m_dumper->dump(m_f, 1); +} + +/* -------------------------------------------------------------------------- */ +std::tuple Simulation::compute() { + int s = 0; + + // std::cout << m_h_m << " " << m_h_n << " " << m_epsilon << std::endl; + float l2 = 0.; + + do { + l2 = compute_step(); + DEBUG::print(std::to_string(m_prank) + "do_step: " + std::to_string(l2)); + + m_grids.swap(); + + MPI_Allreduce(MPI_IN_PLACE, &l2, 1, MPI_FLOAT, MPI_SUM, m_communicator); + DEBUG::print(std::to_string(m_prank) + " MPI_Allreduce: " + std::to_string(l2) + " / " + std::to_string(m_epsilon)); + ++s; + + } while (l2 > m_epsilon); + + m_dumper->set_min(min()); + m_dumper->set_max(max()); + m_dumper->dump(m_grids.old(), 0); + + return std::make_tuple(std::sqrt(l2), s); +} + +/* -------------------------------------------------------------------------- */ +void Simulation::set_epsilon(float epsilon) { m_epsilon = epsilon * epsilon; } + +/* -------------------------------------------------------------------------- */ +float Simulation::epsilon() const { return std::sqrt(m_epsilon); } + +/* -------------------------------------------------------------------------- */ +inline float Simulation::compute_row(int i) { + float l2 = 0.; + + Grid & u = m_grids.current(); + Grid & uo = m_grids.old(); + + for (int j = 1; j < m_local_size[1] - 1; j++) { + // computation of the new step + u(i, j) = (uo(i - 1, j) + uo(i + 1, j) + uo(i, j - 1) + uo(i, j + 1) - + m_f(i, j)) / + 4.; + + // L2 norm + l2 += (uo(i, j) - u(i, j)) * (uo(i, j) - u(i, j)); + } + + return l2; +} + +/* -------------------------------------------------------------------------- */ +float Simulation::compute_step() { + float l2 = 0.; + + auto & uo = m_grids.old(); + auto & u = m_grids.current(); + + auto _compute = [&](auto && i, auto && j) { + u(i, j) = (uo(i - 1, j) + uo(i + 1, j) + uo(i, j - 1) + uo(i, j + 1) - + m_f(i, j)) / + 4.; + + l2 += (uo(i, j) - u(i, j)) * (uo(i, j) - u(i, j)); + }; + + MPI_Neighbor_alltoallw( + &uo(0, 0), m_counts.data(), m_sdispls.data(), m_types.data(), + &uo(0, 0), m_counts.data(), m_rdispls.data(), m_types.data(), + m_communicator); + + // computing the inner rows that do not depend on the ghosts + for (int i = 2; i < m_local_size[0] - 2; i++) { + for (int j = 2; j < m_local_size[1] - 2; j++) { + _compute(i, j); + } + } + + for (int i = 2; i < m_local_size[1] - 2; i++) { + _compute(1, i); + } + + for (int i = 2; i < m_local_size[1] - 2; i++) { + _compute(m_local_size[0] - 2, i); + } + + for (int i = 2; i < m_local_size[0] - 2; i++) { + _compute(i, 1); + } + + for (int i = 2; i < m_local_size[0] - 2; i++) { + _compute(i, m_local_size[1] - 2); + } + + return l2; +} + +namespace { +template +auto accumulate_grid(const G & grid, float init, Func && func) { + auto m = grid.m(); + auto n = grid.n(); + + for (decltype(m) i = 1; i < m - 1; i++) { + for (decltype(n) j = 1; j < n - 1; j++) { + init = func(init, grid(i, j)); + } + } + return init; +} +} // namespace + +/* -------------------------------------------------------------------------- */ +float Simulation::min() const { + const auto & u0 = m_grids.old(); + + auto tmp = accumulate_grid( + u0, u0(1, 1), [](auto && a, auto && b) { return std::min(a, b); }); + + MPI_Allreduce(MPI_IN_PLACE, &tmp, 1, MPI_FLOAT, MPI_MIN, m_communicator); + + return tmp; +} + +/* -------------------------------------------------------------------------- */ +float Simulation::max() const { + const auto & u0 = m_grids.old(); + + auto tmp = accumulate_grid( + u0, u0(1, 1), [](auto && a, auto && b) { return std::max(a, b); }); + + MPI_Allreduce(MPI_IN_PLACE, &tmp, 1, MPI_FLOAT, MPI_MAX, m_communicator); + + return tmp; +} + +/* -------------------------------------------------------------------------- */ diff --git a/poisson/.solutions-cartesian-alltoall/simulation.hh b/poisson/.solutions-cartesian-alltoall/simulation.hh new file mode 100644 index 0000000..2baf0c7 --- /dev/null +++ b/poisson/.solutions-cartesian-alltoall/simulation.hh @@ -0,0 +1,91 @@ +#ifndef SIMULATION_HH +#define SIMULATION_HH + +/* -------------------------------------------------------------------------- */ +#include "double_buffer.hh" +#include "dumpers.hh" +#include "grid.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +class Simulation { +public: + Simulation(int m, int n, MPI_Comm communicator); + + /// set the initial conditions, Dirichlet and source term + virtual void set_initial_conditions(); + + /// perform the simulation + std::tuple compute(); + + /// access the precision + void set_epsilon(float epsilon); + float epsilon() const; + + std::size_t complexity() { return 8 * m_global_size[0] * m_global_size[1]; } + + float min() const; + float max() const; + + float dims(std::size_t i) const { return m_dims[i]; } + +protected: + /// compute one step and return an error + virtual float compute_step(); + + /// compute one row + float compute_row(int i); + +private: + /// Global problem size + std::array m_global_size; + + /// Local problem size + std::array m_local_size; + + /// local offset + std::array m_offset; + + /// Cartesian dimensions + std::array m_dims; + + /// Cartesian coordinates + std::array m_coords; + + /// proc rank + int m_prank{0}; + + /// communicator size + int m_psize{1}; + + /// Precision to achieve + float m_epsilon; + + /// grid spacing + std::array m_h; + + /// Grids storage + DoubleBuffer m_grids; + + /// source term + Grid m_f; + + /// Dumper to use for outputs + std::unique_ptr m_dumper; + + /// communicator + MPI_Comm m_communicator; + + std::array m_sdispls, m_rdispls; + std::array m_types; + std::array m_counts; + + MPI_Datatype m_line_t, m_column_t; +}; + +#endif /* SIMULATION_HH */