diff --git a/poisson/.solutions-cartesian-alltoall/AUTHORS b/poisson/.solutions-cartesian-alltoall/AUTHORS
new file mode 100644
index 0000000..9253635
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/AUTHORS
@@ -0,0 +1,4 @@
+Authors:
+ - Nicolas Richart <nicolas.richart@epfl.ch>
+ - Vincent Keller <vincent.keller@epfl.ch>
+ . Vittoria Rezzonico <vittoria.rezzonico@epfl.ch>
diff --git a/poisson/.solutions-cartesian-alltoall/COPYING b/poisson/.solutions-cartesian-alltoall/COPYING
new file mode 100644
index 0000000..e551330
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/COPYING
@@ -0,0 +1,24 @@
+Copyright (c) 2017, Ecole Polytechnique Federal de Lausanne - Scientific IT and
+Application Support (SCITAS) All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the <organization> nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/poisson/.solutions-cartesian-alltoall/Makefile b/poisson/.solutions-cartesian-alltoall/Makefile
new file mode 100644
index 0000000..00fdc48
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/Makefile
@@ -0,0 +1,21 @@
+CXX=mpic++
+#CXX=g++
+LD=${CXX}
+#VERIFY=-fsanitize=thread
+VERIFY= -D_DEBUG -D_DEBUG_MPI
+OPENMP=-fopenmp
+NOOPENMP=-fno-openmp -Wno-unknown-pragmas
+#OPTIM+=-O3 -D_MPI $(NOOPENMP)
+OPTIM+=-O3 -D_MPI $(NOOPENMP) -march=native -g
+CXXFLAGS+=-Wall -Wextra -Werror -pedantic -std=c++14 -g $(OPTIM) $(VERIFY)
+LDFLAGS+=-lm $(CXXFLAGS)
+
+OBJS=poisson.o grid.o double_buffer.o simulation.o  dumpers.o
+
+all: poisson
+
+poisson: $(OBJS)
+	$(LD) -o $@ $(OBJS) $(LDFLAGS)
+
+clean:
+	rm -f hello poisson *.o *~
diff --git a/poisson/.solutions-cartesian-alltoall/double_buffer.cc b/poisson/.solutions-cartesian-alltoall/double_buffer.cc
new file mode 100644
index 0000000..978eda5
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/double_buffer.cc
@@ -0,0 +1,31 @@
+/* -------------------------------------------------------------------------- */
+#include "double_buffer.hh"
+#include "grid.hh"
+/* -------------------------------------------------------------------------- */
+
+/* -------------------------------------------------------------------------- */
+DoubleBuffer::DoubleBuffer(int m, int n)
+    : m_current(new Grid(m, n)), m_old(new Grid(m, n)) {}
+
+/* -------------------------------------------------------------------------- */
+Grid & DoubleBuffer::current() { return *m_current; }
+
+/* -------------------------------------------------------------------------- */
+Grid & DoubleBuffer::old() { return *m_old; }
+
+/* -------------------------------------------------------------------------- */
+const Grid & DoubleBuffer::current() const { return *m_current; }
+
+/* -------------------------------------------------------------------------- */
+const Grid & DoubleBuffer::old() const { return *m_old; }
+
+/* -------------------------------------------------------------------------- */
+void DoubleBuffer::swap() {
+  m_current.swap(m_old);
+}
+
+/* -------------------------------------------------------------------------- */
+void DoubleBuffer::resize(int m, int n) {
+  m_current->resize(m, n);
+  m_old->resize(m, n);
+}
diff --git a/poisson/.solutions-cartesian-alltoall/double_buffer.hh b/poisson/.solutions-cartesian-alltoall/double_buffer.hh
new file mode 100644
index 0000000..1d21b5c
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/double_buffer.hh
@@ -0,0 +1,28 @@
+#ifndef DOUBLE_BUFFER
+#define DOUBLE_BUFFER
+
+/* -------------------------------------------------------------------------- */
+#include <memory>
+/* -------------------------------------------------------------------------- */
+#include "grid.hh"
+/* -------------------------------------------------------------------------- */
+
+class DoubleBuffer {
+public:
+  DoubleBuffer(int m = 0 , int n = 0);
+
+  void resize(int m, int n);
+  Grid & current();
+  Grid & old();
+
+  const Grid & current() const;
+  const Grid & old() const;
+
+  void swap();
+private:
+
+  std::unique_ptr<Grid> m_current;
+  std::unique_ptr<Grid> m_old;
+};
+
+#endif /* DOUBLE_BUFFER */
diff --git a/poisson/.solutions-cartesian-alltoall/dumpers.cc b/poisson/.solutions-cartesian-alltoall/dumpers.cc
new file mode 100644
index 0000000..c626753
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/dumpers.cc
@@ -0,0 +1,205 @@
+/* -------------------------------------------------------------------------- */
+#include "dumpers.hh"
+#include "grid.hh"
+/* -------------------------------------------------------------------------- */
+#include <array>
+#include <iostream>
+#include <fstream>
+#include <iomanip>
+#include <sstream>
+/* -------------------------------------------------------------------------- */
+
+/* -------------------------------------------------------------------------- */
+void Dumper::set_min(float min) { m_min = min; }
+
+void Dumper::set_max(float max) { m_max = max; }
+
+float Dumper::min() const { return m_min; }
+
+float Dumper::max() const { return m_max; }
+
+/* -------------------------------------------------------------------------- */
+void DumperASCII::dump(const Grid & grid, int step) {
+  std::ofstream fout;
+  std::stringstream sfilename;
+
+  sfilename << "out_" << std::setfill('0') << std::setw(5) << step << ".pgm";
+  fout.open(sfilename.str());
+
+  int m = grid.m();
+  int n = grid.n();
+
+  fout << "P2" << std::endl << "# CREATOR: Poisson program" << std::endl;
+  fout << m << " " << n << std::endl;
+  fout << 255 << std::endl;
+
+  for (int i = 0; i < m; i++) {
+    for (int j = 0; j < n; j++) {
+      int v = 255. * (grid(i, j) - m_min) / (m_max - m_min);
+      v = std::min(v, 255);
+      fout << v << std::endl;
+    }
+  }
+}
+
+/* -------------------------------------------------------------------------- */
+void DumperBinary::dump(const Grid & grid, int step) {
+  std::stringstream sfilename;
+
+  std::array<int, 2> coords{0, 0}, dims{0, 0};
+
+  int prank{0}, psize{1};
+  MPI_Comm_rank(m_communicator, &prank);
+  MPI_Comm_size(m_communicator, &psize);
+
+  std::array<int, 2> periods{0, 0};
+  MPI_Cart_get(m_communicator, dims.size(), dims.data(), periods.data(), coords.data());
+
+  sfilename << "out_" << std::setfill('0') << std::setw(5) << step << ".bmp";
+
+  int h = grid.m();
+  int w = grid.n();
+  int global_w = w;
+
+  std::array<int, 2> starts = {0, 0};
+  std::array<int, 2> local_sizes = {h, w};
+  std::array<int, 2> global_sizes = local_sizes;
+
+  // removing the ghosts
+  if(prank == 0) std::cout << dims[0] << " " << dims[1] << "\n";
+
+  for (std::size_t i = 0; i < local_sizes.size(); ++i) {
+    if (dims[i] > 1) {
+      local_sizes[i] -= (coords[i] == 0 || coords[i] == dims[i] - 1 ? 1 : 2);
+      starts[i] += (coords[i] == 0 ? 0 : 1);
+    }
+  }
+
+  std::array<MPI_Comm, 2> comms;
+
+  // determining the local offset
+  std::array<int, 2> offset = {0, 0};
+
+  std::vector<std::vector<int>> line_col_size(2);
+
+  for (size_t i = 0; i < line_col_size.size(); ++i) {
+    std::array<int, 2> remain_dims = {false, false};
+    remain_dims[i] = true;
+    MPI_Cart_sub(m_communicator, remain_dims.data(), &(comms[i]));
+
+    line_col_size[i].resize(dims[i]);
+    MPI_Allgather(&local_sizes[i], 1, MPI_INT, line_col_size[i].data(), 1,
+                  MPI_INT, comms[i]);
+
+    global_sizes[i] = 0;
+    for (int j = 0; j < dims[i]; ++j) {
+      global_sizes[i] += line_col_size[i][j];
+    }
+  }
+
+  //for (int i = dims[0] - 1; i > coords[0]; --i) {
+  for (int i = 0; i < coords[0]; ++i) {
+    offset[0] += line_col_size[0][i];
+  }
+
+  for (int i = 0; i < coords[1]; ++i) {
+    offset[1] += line_col_size[1][i];
+  }
+
+  global_w = 4 * ((24 * global_sizes[1] + 31) / 32);
+
+  // if the file width (3*w) is not a multiple of 4 adds enough bytes to make it
+  // a multiple of 4
+  int padding = global_w - 3 * global_sizes[1];
+
+  w = 3 * local_sizes[1];
+
+  if (coords[1] == dims[1] - 1) {
+    w += padding;
+  } else {
+    padding = 0;
+  }
+
+  int filesize = 54 + global_sizes[0] * global_w;
+
+  std::vector<char> img(local_sizes[0] * w);
+  std::fill(img.begin(), img.end(), 0);
+
+  for (int i = 0; i < local_sizes[0]; i++) {
+    for (int j = 0; j < local_sizes[1]; j++) {
+      float v =
+          ((grid(i + starts[0], j + starts[1]) - m_min) / (m_max - m_min));
+
+      float r = v * 255; // Red channel
+      float g = v * 255; // Green channel
+      float b = prank * 255 / psize; // Red channel
+
+      r = std::min(r, 255.f);
+      g = std::min(g, 255.f);
+      b = std::min(b, 255.f);
+
+      img[w * i + 3 * j + 0] = r;
+      img[w * i + 3 * j + 1] = g;
+      img[w * i + 3 * j + 2] = b;
+    }
+  }
+
+  auto write_bytes = [](auto && array, auto && value) {
+    array[0] = value;
+    array[1] = value >> 8;
+    array[2] = value >> 16;
+    array[3] = value >> 24;
+  };
+
+  std::array<char, 14> bmpfileheader = {'B', 'M', 0, 0,  0, 0, 0,
+                                        0,   0,   0, 54, 0, 0, 0};
+  std::array<char, 40> bmpinfoheader = {40, 0, 0, 0, 0, 0, 0,  0,
+                                        0,  0, 0, 0, 1, 0, 24, 0};
+
+  write_bytes(&bmpfileheader[2], filesize);
+
+  write_bytes(&bmpinfoheader[4], global_sizes[1]);
+  write_bytes(&bmpinfoheader[8], global_sizes[0]);
+  write_bytes(&bmpinfoheader[20], filesize - 54);
+
+  MPI_File fh;
+  MPI_Status status;
+  MPI_Datatype subarray_t;
+
+#if defined(_DEBUG_MPI)
+  for (int i = 0; i < psize; ++i) {
+    if (prank == i)
+      std::cerr << prank << " (" << coords[0] << ", " << coords[1] << ")"
+                << " - " << local_sizes[0] << "x" << local_sizes[1] << "(" << w
+                << ") + " << offset[0] << "x" << offset[1] << "("<< 3*offset[1]<<") ++ ["
+                << global_sizes[0] << "x" << global_sizes[1] << "(" << global_w
+                << ")] " << padding << "\n";
+    MPI_Barrier(m_communicator);
+  }
+#endif
+
+  global_sizes[1] = global_w;
+  local_sizes[1] = w;
+  offset[1] *= 3;
+
+  MPI_Type_create_subarray(global_sizes.size(), global_sizes.data(),
+                           local_sizes.data(), offset.data(), MPI_ORDER_C,
+                           MPI_CHAR, &subarray_t);
+  MPI_Type_commit(&subarray_t);
+
+  // opening a file in write and create mode
+  MPI_File_open(MPI_COMM_WORLD, sfilename.str().c_str(),
+                MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
+  // defining the size of the file
+  MPI_File_set_size(fh, filesize);
+
+  // rank 0 writes the header
+  if (prank == 0) {
+    MPI_File_write_at(fh, 0, bmpfileheader.data(), 14, MPI_CHAR, &status);
+    MPI_File_write_at(fh, 14, bmpinfoheader.data(), 40, MPI_CHAR, &status);
+  }
+
+  MPI_File_set_view(fh, 54, MPI_CHAR, subarray_t, "native", MPI_INFO_NULL);
+  MPI_File_write(fh, img.data(), img.size(), MPI_CHAR, &status);
+  MPI_File_close(&fh);
+}
diff --git a/poisson/.solutions-cartesian-alltoall/dumpers.hh b/poisson/.solutions-cartesian-alltoall/dumpers.hh
new file mode 100644
index 0000000..dc9b013
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/dumpers.hh
@@ -0,0 +1,58 @@
+#if defined(_MPI)
+#include <mpi.h>
+#endif
+
+#ifndef DUMPERS_HH
+#define DUMPERS_HH
+
+/* -------------------------------------------------------------------------- */
+class Grid;
+
+/* -------------------------------------------------------------------------- */
+class Dumper {
+public:
+#if defined(_MPI)
+  explicit Dumper(MPI_Comm communicator)
+      : m_min(-1.), m_max(1.), m_communicator(communicator) {}
+#else
+  Dumper() : m_min(-1.), m_max(1.) {}
+#endif
+  virtual void dump(const Grid & m_grid, int step) = 0;
+
+  void set_min(float min);
+  void set_max(float max);
+
+  float min() const;
+  float max() const;
+
+protected:
+  float m_min, m_max;
+#if defined(_MPI)
+  MPI_Comm m_communicator;
+#endif
+};
+
+/* -------------------------------------------------------------------------- */
+class DumperASCII : public Dumper {
+public:
+#if defined(_MPI)
+  explicit DumperASCII(MPI_Comm communicator) : Dumper(communicator) {}
+#else
+  DumperASCII() : Dumper() {}
+#endif
+
+  virtual void dump(const Grid & grid, int step);
+};
+
+/* -------------------------------------------------------------------------- */
+class DumperBinary : public Dumper {
+public:
+#if defined(_MPI)
+  explicit DumperBinary(MPI_Comm communicator) : Dumper(communicator) {}
+#else
+  DumperBinary() : Dumper() {}
+#endif
+  virtual void dump(const Grid & grid, int step);
+};
+
+#endif /* DUMPERS_HH */
diff --git a/poisson/.solutions-cartesian-alltoall/grid.cc b/poisson/.solutions-cartesian-alltoall/grid.cc
new file mode 100644
index 0000000..1a46b04
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/grid.cc
@@ -0,0 +1,29 @@
+/* -------------------------------------------------------------------------- */
+#include "grid.hh"
+/* -------------------------------------------------------------------------- */
+#include <algorithm>
+#include <iostream>
+#if defined(_DEBUG_MPI)
+#include <iomanip>
+#endif
+/* -------------------------------------------------------------------------- */
+
+/* -------------------------------------------------------------------------- */
+Grid::Grid(int m, int n) : m_m(m), m_n(n), m_storage(m * n) { clear(); }
+
+/* -------------------------------------------------------------------------- */
+Grid::~Grid() = default;
+
+/* -------------------------------------------------------------------------- */
+void Grid::clear() { std::fill(m_storage.begin(), m_storage.end(), 0.); }
+
+/* -------------------------------------------------------------------------- */
+void Grid::resize(int m, int n) {
+  m_m = m;
+  m_n = n;
+  m_storage.resize(m * n);
+}
+
+/* -------------------------------------------------------------------------- */
+int Grid::m() const { return m_m; }
+int Grid::n() const { return m_n; }
diff --git a/poisson/.solutions-cartesian-alltoall/grid.hh b/poisson/.solutions-cartesian-alltoall/grid.hh
new file mode 100644
index 0000000..8c5a865
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/grid.hh
@@ -0,0 +1,53 @@
+#ifndef GRID_HH
+#define GRID_HH
+
+/* -------------------------------------------------------------------------- */
+#include <array>
+#include <vector>
+#include <mpi.h>
+#include <iostream>
+/* -------------------------------------------------------------------------- */
+namespace {
+struct DEBUG {
+#if defined(DEBUG)
+  static void print(const std::string & func) {
+    std::cerr << func << "\n";
+  }
+#else
+  static void print(const std::string & ) {
+  
+  }
+#endif
+};
+} // namespace
+
+/* -------------------------------------------------------------------------- */
+
+class Grid {
+public:
+  Grid(int m = 0, int n = 0);
+  ~Grid();
+
+  /// access the value [i][j] of the grid
+  inline float & operator()(int i, int j) { return m_storage[i * m_n + j]; }
+  inline const float & operator()(int i, int j) const {
+    return m_storage[i * m_n + j];
+  }
+
+  void resize(int m, int n);
+
+  /// set the grid to 0
+  void clear();
+
+  int m() const;
+  int n() const;
+
+private:
+  int m_m, m_n;
+  std::vector<float> m_storage;
+
+  /// request for asynchronous communications
+  std::array<MPI_Request, 8> m_requests;
+};
+
+#endif /* GRID_HH */
diff --git a/poisson/.solutions-cartesian-alltoall/optim.sub b/poisson/.solutions-cartesian-alltoall/optim.sub
new file mode 100644
index 0000000..3e4440e
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/optim.sub
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+##SBATCH --reservation=phpc2017
+##SBATCH --account=phpc2017
+#SBATCH -N 1
+#SBATCH -n 1
+
+OPTIMS="-O0 -O1 -O2 -O3"
+N=512
+
+
+module load gcc
+
+echo gcc > gcc.out
+for _opt in ${OPTIMS} '-O3 -ftree-vectorize'
+do
+  make clean
+  make CXXFLAGS="-std=c++11 ${_opt}" CXX=g++
+  ./poisson $N| cut -f4 -d " " >> gcc.out
+done
+
+
+module load intel
+
+echo icpc > intel.out
+for _opt in ${OPTIMS} '-O3 -xHOST'
+do
+  make clean
+  make CXXFLAGS="-std=c++11 ${_opt}" CXX=icpc
+  ./poisson $N | cut -f4 -d " " >> intel.out
+done
+
+echo " ${OPTIMS} specific" | tr " " "\n" > header
+
+paste header gcc.out intel.out > compilers.out
diff --git a/poisson/.solutions-cartesian-alltoall/poisson.cc b/poisson/.solutions-cartesian-alltoall/poisson.cc
new file mode 100644
index 0000000..95f843b
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/poisson.cc
@@ -0,0 +1,73 @@
+/* -------------------------------------------------------------------------- */
+#include "simulation.hh"
+/* -------------------------------------------------------------------------- */
+#include <chrono>
+#include <iostream>
+#include <sstream>
+#include <tuple>
+/* -------------------------------------------------------------------------- */
+#include <mpi.h>
+/* -------------------------------------------------------------------------- */
+
+#define EPSILON 0.05
+
+typedef std::chrono::high_resolution_clock clk;
+typedef std::chrono::duration<double> second;
+
+static void usage(const std::string & prog_name) {
+  std::cerr << prog_name << " <grid_size>" << std::endl;
+  exit(0);
+}
+
+int main(int argc, char * argv[]) {
+  int prank{0}, psize{1}, nthreads{1};
+  MPI_Init(NULL, NULL);
+  // assert(provided == MPI_THREAD_MULTIPLE);
+
+  MPI_Comm_rank(MPI_COMM_WORLD, &prank);
+  MPI_Comm_size(MPI_COMM_WORLD, &psize);
+
+  if (argc != 2)
+    usage(argv[0]);
+
+  std::stringstream args(argv[1]);
+  int N;
+  args >> N;
+
+  if (args.fail())
+    usage(argv[0]);
+
+  std::unique_ptr<Simulation> simu;
+
+  simu = std::make_unique<Simulation>(N, N, MPI_COMM_WORLD);
+
+  simu->set_initial_conditions();
+  simu->set_epsilon(EPSILON);
+
+  float l2;
+  int k;
+
+  auto start = clk::now();
+  std::tie(l2, k) = simu->compute();
+  auto end = clk::now();
+
+  second time = end - start;
+
+  auto min = simu->min();
+  auto max = simu->max();
+
+  if (prank == 0)
+    std::cout << "MPI:   " << psize << " (" << simu->dims(0) << ", " << simu->dims(1) << ")\n"
+              << "OMP:   " << nthreads << "\n"
+              << "Grid:  " << N << "x" << N << "\n"
+              << "Bound: " << "[ " << min << " : " << max << " ]" << "\n"
+              << "Iter:  " << k << "\n"
+              << "Norm:  " << std::scientific << l2 << "\n"
+              << "Time:  " << time.count()  << "s" << "\n"
+              << "Flops: " << simu->complexity()*k/time.count()/1e9  << " GFlops" << std::endl;
+
+  simu.release(); // To force destruction of MPI related stuff
+  MPI_Finalize();
+
+  return 0;
+}
diff --git a/poisson/.solutions-cartesian-alltoall/poisson.sub b/poisson/.solutions-cartesian-alltoall/poisson.sub
new file mode 100644
index 0000000..aa43198
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/poisson.sub
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+##SBATCH --reservation=phpc2017
+##SBATCH --account=phpc2017
+#SBATCH -N 1
+#SBATCH -n 1
+
+module load gcc
+
+rm -f complexity.dat
+
+for i in 128 256 512 1024
+do
+    ./poisson $i >> complexity.dat
+done
+
+slmodules -r deprecated
+module load gnuplot
+
+gnuplot <<EOF
+set terminal svg
+set output 'complexity.svg'
+set multiplot layout 2,1
+set key off
+set xlabel "N [-]"
+set ylabel "iter [-]"
+plot "complexity.dat" u 1:2 w lp
+
+set ylabel "time [s]"
+plot "complexity.dat" u 1:2 w lp
+EOF
diff --git a/poisson/.solutions-cartesian-alltoall/simulation.cc b/poisson/.solutions-cartesian-alltoall/simulation.cc
new file mode 100644
index 0000000..49a01d1
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/simulation.cc
@@ -0,0 +1,269 @@
+/* -------------------------------------------------------------------------- */
+#include "simulation.hh"
+/* -------------------------------------------------------------------------- */
+#include <assert.h>
+#include <cmath>
+#include <iostream>
+#include <numeric>
+#include <mpi.h>
+
+/* -------------------------------------------------------------------------- */
+Simulation::Simulation(int m, int n, MPI_Comm communicator) {
+
+  // retrieving the number of proc and the rank in the proc pool
+  MPI_Comm_size(communicator, &m_psize);
+
+  m_dims = {0, 0};
+  std::array<int, 2> periods{0, 0};
+  MPI_Dims_create(m_psize, m_dims.size(), m_dims.data());
+
+  assert(m_dims[0] * m_dims[1] == m_psize && "MPI_Dims_create is fucked up");
+
+  MPI_Cart_create(communicator, m_dims.size(), m_dims.data(), periods.data(), 0,
+                  &m_communicator);
+
+  MPI_Comm_rank(m_communicator, &m_prank);
+
+  MPI_Cart_coords(m_communicator, m_prank, m_coords.size(), m_coords.data());
+
+  m_dumper = std::make_unique<DumperBinary>(m_communicator);
+
+  m_global_size = {m, n};
+
+  std::array<int, 2> ghosts{0, 0};
+
+  for (std::size_t i = 0; i < m_local_size.size(); ++i) {
+    // computation of the local size of the grid the remainder is spread equally
+    // on the first processors
+    m_local_size[i] = m_global_size[i] / m_dims[i] +
+                      (m_coords[i] < m_global_size[i] % m_dims[i] ? 1 : 0);
+
+    // adding the ghosts lines if needed
+    if (m_dims[i] > 1)
+      ghosts[i] = (m_coords[i] == 0 || m_coords[i] == m_dims[i] - 1) ? 1 : 2;
+
+    m_local_size[i] += ghosts[i];
+
+    // computing the offsets of the local grid in the global one
+    m_offset[i] = (m_global_size[i] / m_dims[i]) * m_coords[i] +
+                  (m_coords[i] < m_global_size[i] % m_dims[i]
+                       ? m_coords[i]
+                       : m_global_size[i] % m_dims[i]);
+
+    m_h[i] = 1. / m_global_size[i];
+  }
+
+#if defined(_DEBUG_MPI)
+  for (int i = 0; i < m_psize; ++i) {
+    if (m_prank == i)
+      std::cerr << m_prank << " (" << m_coords[0] << ", " << m_coords[1] << ")"
+                << " - " << m_local_size[0] << "x" << m_local_size[1] << " + "
+                << m_offset[0] << "x" << m_offset[1] << " ++ [" << ghosts[0]
+                << "x" << ghosts[1] << "]\n";
+    MPI_Barrier(m_communicator);
+  }
+#endif
+
+  // resizing the different grids
+  m_grids.resize(m_local_size[0], m_local_size[1]);
+  m_f.resize(m_local_size[0], m_local_size[1]);
+
+  MPI_Type_vector(1, m_local_size[1] - ghosts[1], 1, MPI_FLOAT, &m_line_t);
+  MPI_Type_vector(m_local_size[0] - ghosts[0], 1, m_local_size[1], MPI_FLOAT, &m_column_t);
+
+  MPI_Type_commit(&m_column_t);
+  MPI_Type_commit(&m_line_t);
+
+  auto & uo = m_grids.old();
+
+  auto displ = [&](auto && d) {
+    MPI_Aint addr0, addr1;
+    MPI_Get_address(&uo(0, 0), &addr0);
+    MPI_Get_address(&d, &addr1);
+    return MPI_Aint_diff(addr1, addr0);
+  };
+
+  m_sdispls = {
+    displ(uo(1, 1)),
+    displ(uo(m_local_size[0] - 2, 1)),
+    displ(uo(1, 1)),
+    displ(uo(1, m_local_size[1] - 2))
+  };
+
+  m_rdispls = {
+    displ(uo(0, 1)),
+    displ(uo(m_local_size[0] - 1, 1)),
+    displ(uo(1, 0)),
+    displ(uo(1, m_local_size[1] - 1)),
+  };
+
+  m_types = {m_line_t, m_line_t, m_column_t, m_column_t};
+  m_counts = {1, 1, 1, 1};
+}
+
+/* -------------------------------------------------------------------------- */
+void Simulation::set_initial_conditions() {
+  std::array<int, 2> start = {0, 0};
+  std::array<int, 2> end = m_local_size;
+
+  for (int i = 0; i < 2; ++i) {
+    start[i] += (m_coords[i] == 0 ? 0 : 1);
+    end[i] -= (m_coords[i] == m_dims[i] - 1 ? 0 : 1);
+  }
+
+  float min = 0., max = 0.;
+  float a = 5.5 * M_PI;
+  for (int i = start[0]; i < end[0]; i++) {
+    for (int j = start[1]; j < end[1]; j++) {
+      m_f(i, j) =
+          -2. * a * a * std::sin(a * (m_offset[0] + i - start[0]) * m_h[0]) *
+          std::sin(a * (m_offset[1] + j - start[1]) * m_h[1]) * m_h[0] * m_h[1];
+
+      min = std::min(min, m_f(i, j));
+      max = std::max(max, m_f(i, j));
+    }
+  }
+
+  m_dumper->set_min(min);
+  m_dumper->set_max(max);
+  m_dumper->dump(m_f, 1);
+}
+
+/* -------------------------------------------------------------------------- */
+std::tuple<float, int> Simulation::compute() {
+  int s = 0;
+
+  // std::cout << m_h_m << " " << m_h_n << " " << m_epsilon << std::endl;
+  float l2 = 0.;
+
+  do {
+    l2 = compute_step();
+    DEBUG::print(std::to_string(m_prank) + "do_step: " + std::to_string(l2));
+
+    m_grids.swap();
+
+    MPI_Allreduce(MPI_IN_PLACE, &l2, 1, MPI_FLOAT, MPI_SUM, m_communicator);
+    DEBUG::print(std::to_string(m_prank) + " MPI_Allreduce: " + std::to_string(l2) + " / " + std::to_string(m_epsilon));
+    ++s;
+
+  } while (l2 > m_epsilon);
+
+  m_dumper->set_min(min());
+  m_dumper->set_max(max());
+  m_dumper->dump(m_grids.old(), 0);
+
+  return std::make_tuple(std::sqrt(l2), s);
+}
+
+/* -------------------------------------------------------------------------- */
+void Simulation::set_epsilon(float epsilon) { m_epsilon = epsilon * epsilon; }
+
+/* -------------------------------------------------------------------------- */
+float Simulation::epsilon() const { return std::sqrt(m_epsilon); }
+
+/* -------------------------------------------------------------------------- */
+inline float Simulation::compute_row(int i) {
+  float l2 = 0.;
+
+  Grid & u = m_grids.current();
+  Grid & uo = m_grids.old();
+
+  for (int j = 1; j < m_local_size[1] - 1; j++) {
+    // computation of the new step
+    u(i, j) = (uo(i - 1, j) + uo(i + 1, j) + uo(i, j - 1) + uo(i, j + 1) -
+               m_f(i, j)) /
+              4.;
+
+    // L2 norm
+    l2 += (uo(i, j) - u(i, j)) * (uo(i, j) - u(i, j));
+  }
+
+  return l2;
+}
+
+/* -------------------------------------------------------------------------- */
+float Simulation::compute_step() {
+  float l2 = 0.;
+
+  auto & uo = m_grids.old();
+  auto & u = m_grids.current();
+
+  auto _compute = [&](auto && i, auto && j) {
+    u(i, j) = (uo(i - 1, j) + uo(i + 1, j) + uo(i, j - 1) + uo(i, j + 1) -
+               m_f(i, j)) /
+              4.;
+
+    l2 += (uo(i, j) - u(i, j)) * (uo(i, j) - u(i, j));
+  };
+
+  MPI_Neighbor_alltoallw(
+    &uo(0, 0), m_counts.data(), m_sdispls.data(), m_types.data(),
+    &uo(0, 0), m_counts.data(), m_rdispls.data(), m_types.data(),
+    m_communicator);
+
+  // computing the inner rows that do not depend on the ghosts
+  for (int i = 2; i < m_local_size[0] - 2; i++) {
+    for (int j = 2; j < m_local_size[1] - 2; j++) {
+      _compute(i, j);
+    }
+  }
+
+  for (int i = 2; i < m_local_size[1] - 2; i++) {
+    _compute(1, i);
+  }
+
+  for (int i = 2; i < m_local_size[1] - 2; i++) {
+    _compute(m_local_size[0] - 2, i);
+  }
+
+  for (int i = 2; i < m_local_size[0] - 2; i++) {
+    _compute(i, 1);
+  }
+
+  for (int i = 2; i < m_local_size[0] - 2; i++) {
+    _compute(i, m_local_size[1] - 2);
+  }
+
+  return l2;
+}
+
+namespace {
+template <typename G, typename Func>
+auto accumulate_grid(const G & grid, float init, Func && func) {
+  auto m = grid.m();
+  auto n = grid.n();
+
+  for (decltype(m) i = 1; i < m - 1; i++) {
+    for (decltype(n) j = 1; j < n - 1; j++) {
+      init = func(init, grid(i, j));
+    }
+  }
+  return init;
+}
+} // namespace
+
+/* -------------------------------------------------------------------------- */
+float Simulation::min() const {
+  const auto & u0 = m_grids.old();
+
+  auto tmp = accumulate_grid(
+      u0, u0(1, 1), [](auto && a, auto && b) { return std::min(a, b); });
+
+  MPI_Allreduce(MPI_IN_PLACE, &tmp, 1, MPI_FLOAT, MPI_MIN, m_communicator);
+
+  return tmp;
+}
+
+/* -------------------------------------------------------------------------- */
+float Simulation::max() const {
+  const auto & u0 = m_grids.old();
+
+  auto tmp = accumulate_grid(
+      u0, u0(1, 1), [](auto && a, auto && b) { return std::max(a, b); });
+
+  MPI_Allreduce(MPI_IN_PLACE, &tmp, 1, MPI_FLOAT, MPI_MAX, m_communicator);
+
+  return tmp;
+}
+
+/* -------------------------------------------------------------------------- */
diff --git a/poisson/.solutions-cartesian-alltoall/simulation.hh b/poisson/.solutions-cartesian-alltoall/simulation.hh
new file mode 100644
index 0000000..2baf0c7
--- /dev/null
+++ b/poisson/.solutions-cartesian-alltoall/simulation.hh
@@ -0,0 +1,91 @@
+#ifndef SIMULATION_HH
+#define SIMULATION_HH
+
+/* -------------------------------------------------------------------------- */
+#include "double_buffer.hh"
+#include "dumpers.hh"
+#include "grid.hh"
+/* -------------------------------------------------------------------------- */
+#include <array>
+#include <memory>
+#include <tuple>
+#include <mpi.h>
+/* -------------------------------------------------------------------------- */
+
+/* -------------------------------------------------------------------------- */
+class Simulation {
+public:
+  Simulation(int m, int n, MPI_Comm communicator);
+
+  /// set the initial conditions, Dirichlet and source term
+  virtual void set_initial_conditions();
+
+  /// perform the simulation
+  std::tuple<float, int> compute();
+
+  /// access the precision
+  void set_epsilon(float epsilon);
+  float epsilon() const;
+
+  std::size_t complexity() { return 8 * m_global_size[0] * m_global_size[1]; }
+
+  float min() const;
+  float max() const;
+
+  float dims(std::size_t i) const { return m_dims[i]; }
+
+protected:
+  /// compute one step and return an error
+  virtual float compute_step();
+
+  /// compute one row
+  float compute_row(int i);
+
+private:
+  /// Global problem size
+  std::array<int, 2> m_global_size;
+
+  /// Local problem size
+  std::array<int, 2> m_local_size;
+
+  /// local offset
+  std::array<int, 2> m_offset;
+
+  /// Cartesian dimensions
+  std::array<int, 2> m_dims;
+
+  /// Cartesian coordinates
+  std::array<int, 2> m_coords;
+
+  /// proc rank
+  int m_prank{0};
+
+  /// communicator size
+  int m_psize{1};
+
+  /// Precision to achieve
+  float m_epsilon;
+
+  /// grid spacing
+  std::array<float, 2> m_h;
+
+  /// Grids storage
+  DoubleBuffer m_grids;
+
+  /// source term
+  Grid m_f;
+
+  /// Dumper to use for outputs
+  std::unique_ptr<Dumper> m_dumper;
+
+  /// communicator
+  MPI_Comm m_communicator;
+
+  std::array<MPI_Aint, 4> m_sdispls, m_rdispls;
+  std::array<MPI_Datatype, 4> m_types;
+  std::array<int, 4> m_counts;
+
+  MPI_Datatype m_line_t, m_column_t;
+};
+
+#endif /* SIMULATION_HH */