diff --git a/poisson_wip/.poisson_WeakScaling.cc.swo b/poisson_wip/.poisson_WeakScaling.cc.swo
new file mode 100644
index 0000000..9080b5a
Binary files /dev/null and b/poisson_wip/.poisson_WeakScaling.cc.swo differ
diff --git a/poisson_wip/Makefile b/poisson_wip/Makefile
index d44fcad..148bdde 100644
--- a/poisson_wip/Makefile
+++ b/poisson_wip/Makefile
@@ -1,23 +1,23 @@
-#OPTIM+=-O3 -march=native
-DEBUG+=-g -O1
+OPTIM+=-O3 -march=native
+#DEBUG+=-g -O0
 
 CXX=mpicxx
 CC=mpicxx
 
 LD=${CXX}
-CXXFLAGS+= $(DEBUG) -Wall -Wextra -Werror -pedantic -std=c++11
+CXXFLAGS+= $(OPTIM) $(DEBUG) -Wall -Wextra -Werror -pedantic -std=c++11
 
 # Uncomment the line below is you use intel compiler
 # CXXFLAGS+=-no-multibyte-chars
 
 LDFLAGS+= $(DEBUG) -lm $(CXXFLAGS)
 
 OBJS=poisson.o simulation.o double_buffer.o grid.o dumpers.o
 
 all: poisson
 
 poisson: $(OBJS)
 	$(LD) -o $@ $(OBJS) $(LDFLAGS)
 
 clean:
 	rm -f hello poisson *.o *~
diff --git a/poisson_wip/RES_STRONG_SCALING.out b/poisson_wip/RES_STRONG_SCALING.out
new file mode 100644
index 0000000..e343441
--- /dev/null
+++ b/poisson_wip/RES_STRONG_SCALING.out
@@ -0,0 +1,28 @@
+4 512 50 1.953665e-01 4.910314e-03
+4 1024 50 1.566734e-01 1.707340e-02
+4 2048 50 5.252314e-02 7.919623e-02
+4 4096 50 1.409257e-02 3.338150e-01
+1 512 50 1.925999e-01 1.674954e-02
+1 1024 50 5.535238e-02 6.725341e-02
+1 2048 50 1.428576e-02 2.927208e-01
+1 4096 50 3.525313e-03 1.222618e+00
+2 512 50 2.916186e-01 8.708020e-03
+2 1024 50 1.032499e-01 3.449003e-02
+2 2048 50 2.813629e-02 1.498244e-01
+2 4096 50 7.128438e-03 6.634533e-01
+8 512 50 5.083296e-03 2.454132e-03
+8 1024 50 1.048306e-01 9.116482e-03
+8 2048 50 7.969433e-02 4.739401e-02
+8 4096 50 2.634776e-02 2.324006e-01
+16 1024 50 2.736356e-03 5.234973e-03
+16 512 50 9.928941e-06 1.540936e-03
+16 2048 50 5.334156e-02 3.863307e-02
+16 4096 50 4.000773e-02 1.915168e-01
+32 2048 50 1.393561e-03 5.472149e-02
+32 512 50 4.368700e-05 4.394457e-02
+32 1024 50 5.323665e-06 4.932709e-02
+32 4096 50 2.678525e-02 1.218722e-01
+56 512 50 1.972880e-03 3.652409e-02
+56 1024 50 2.876078e-05 4.078154e-02
+56 2048 50 1.548957e-06 5.334825e-02
+56 4096 50 2.334388e-03 8.848817e-02
diff --git a/poisson_wip/RES_WEAK_SCALING.out b/poisson_wip/RES_WEAK_SCALING.out
new file mode 100644
index 0000000..1c27947
--- /dev/null
+++ b/poisson_wip/RES_WEAK_SCALING.out
@@ -0,0 +1,28 @@
+1 4096 50 3.526251e-03 1.220624e+00
+1 4096 50 3.526251e-03 1.220525e+00
+1 4096 50 3.526251e-03 1.220003e+00
+1 4096 50 3.526251e-03 1.215247e+00
+2 5792 50 3.524356e-03 1.240361e+00
+2 5792 50 3.524356e-03 1.242234e+00
+2 5792 50 3.524356e-03 1.241933e+00
+2 5792 50 3.524356e-03 1.245746e+00
+4 8192 50 3.514089e-03 1.378891e+00
+4 8192 50 3.514089e-03 1.383240e+00
+4 8192 50 3.514089e-03 1.371760e+00
+4 8192 50 3.514089e-03 1.384499e+00
+8 11585 50 3.493383e-03 1.844967e+00
+8 11585 50 3.493383e-03 1.860703e+00
+8 11585 50 3.493383e-03 1.849053e+00
+8 11585 50 3.493383e-03 1.896977e+00
+16 16384 50 3.449781e-03 3.095787e+00
+16 16384 50 3.449781e-03 3.093048e+00
+16 16384 50 3.449781e-03 3.095237e+00
+16 16384 50 3.449781e-03 3.101045e+00
+32 23170 50 inf 3.062012e+00
+32 23170 50 inf 3.061116e+00
+32 23170 50 3.366011e-03 3.084201e+00
+32 23170 50 3.366011e-03 3.065258e+00
+56 30651 50 3.250067e-03 3.078269e+00
+56 30651 50 3.250067e-03 3.084067e+00
+56 30651 50 3.250067e-03 3.070983e+00
+56 30651 50 3.250067e-03 3.072164e+00
diff --git a/poisson_wip/abitlesssmartloop.sh b/poisson_wip/abitlesssmartloop.sh
new file mode 100644
index 0000000..ea752c3
--- /dev/null
+++ b/poisson_wip/abitlesssmartloop.sh
@@ -0,0 +1,30 @@
+#!/bin/bash -l
+
+# Clean terminal and pwd
+clear
+rm -r slurm-*
+
+# Compilation
+echo Compiling...
+make clean
+make
+
+# Prepare collection file
+echo Preparing collection file...
+rm -r out.txt
+touch out.txt
+
+# Run programs
+echo Running...
+
+#SBATCH -n 8
+echo Running with 8 processes
+
+for ppdir in 512 1024 2048 4096
+do
+	echo $ppdir points
+	srun poisson $ppdir 50 >> out.txt
+done
+
+echo heres the results:
+cat out.txt
diff --git a/poisson_wip/fast.sh b/poisson_wip/fast.sh
new file mode 100644
index 0000000..71f69fd
--- /dev/null
+++ b/poisson_wip/fast.sh
@@ -0,0 +1,5 @@
+#! /bin/bash -l
+sbatch sc512.sh
+sbatch sc1024.sh
+sbatch sc2048.sh
+sbatch sc4096.sh
diff --git a/poisson_wip/output.txt b/poisson_wip/output.txt
new file mode 100644
index 0000000..e69de29
diff --git a/poisson_wip/outs.txt b/poisson_wip/outs.txt
new file mode 100644
index 0000000..e69de29
diff --git a/poisson_wip/poisson_SS.cc b/poisson_wip/poisson_SS.cc
new file mode 100644
index 0000000..b9ab20e
--- /dev/null
+++ b/poisson_wip/poisson_SS.cc
@@ -0,0 +1,69 @@
+/* -------------------------------------------------------------------------- */
+#include "simulation.hh"
+/* -------------------------------------------------------------------------- */
+#include <chrono>
+#include <iostream>
+#include <sstream>
+#include <tuple>
+#include <chrono>
+/* -------------------------------------------------------------------------- */
+#include <mpi.h>
+/* -------------------------------------------------------------------------- */
+
+typedef std::chrono::high_resolution_clock clk;
+typedef std::chrono::duration<double> second;
+
+static void usage(const std::string & prog_name) {
+  std::cerr << prog_name << " <grid_size> <n_iter>" << std::endl;
+  exit(0);
+}
+
+int main(int argc, char * argv[]) {
+  MPI_Init(&argc, &argv);
+  int prank, psize;
+
+  MPI_Comm_rank(MPI_COMM_WORLD, &prank);
+  MPI_Comm_size(MPI_COMM_WORLD, &psize);
+
+  if (argc != 3) usage(argv[0]);
+
+  std::stringstream arg_0(argv[1]), arg_1(argv[2]);
+  int N;
+  int n_iter;
+  arg_0 >> N;
+  arg_1 >> n_iter;
+
+  if(arg_0.fail() || arg_1.fail()) usage(argv[0]);
+
+  // Compute slice parameters
+  int n_loc=N;
+  if (prank == 0) {
+    n_loc = (int)(N/psize) + 1;
+  } else if (prank < psize-1){
+    n_loc = (int)(N/psize) + 2;
+  } else {
+    n_loc = N - (psize-1)*(int)(N/psize) + 1;
+  }
+
+  Simulation simu(n_loc, N, n_iter, prank, psize);
+
+  simu.set_initial_conditions();
+
+  float l2;
+  int k;
+
+  auto start = clk::now();
+  std::tie(l2, k) = simu.compute();
+  auto end = clk::now();
+
+  second time = end - start;
+
+  if(prank == 0)
+    std::cout << psize << " " << N << " "
+              << k << " " << std::scientific << l2 << " "
+              << time.count() << std::endl;
+
+  MPI_Finalize();
+
+  return 0;
+}
diff --git a/poisson_wip/poisson_WS.cc b/poisson_wip/poisson_WS.cc
new file mode 100644
index 0000000..723a66d
--- /dev/null
+++ b/poisson_wip/poisson_WS.cc
@@ -0,0 +1,64 @@
+/* -------------------------------------------------------------------------- */
+#include "simulation.hh"
+/* -------------------------------------------------------------------------- */
+#include <chrono>
+#include <iostream>
+#include <sstream>
+#include <tuple>
+#include <chrono>
+#include <cmath>
+/* -------------------------------------------------------------------------- */
+#include <mpi.h>
+/* -------------------------------------------------------------------------- */
+
+typedef std::chrono::high_resolution_clock clk;
+typedef std::chrono::duration<double> second;
+
+static void usage(const std::string & prog_name) {
+  std::cerr << prog_name << " <grid_size> <n_iter>" << std::endl;
+  exit(0);
+}
+
+int main(int argc, char * argv[]) {
+  MPI_Init(&argc, &argv);
+  int prank, psize;
+
+  MPI_Comm_rank(MPI_COMM_WORLD, &prank);
+  MPI_Comm_size(MPI_COMM_WORLD, &psize);
+
+  if (argc != 3) usage(argv[0]);
+
+  std::stringstream arg_0(argv[1]), arg_1(argv[2]);
+  int n_iter;
+  int N;
+  arg_0 >> N;
+  arg_1 >> n_iter;
+
+  if(arg_0.fail() || arg_1.fail()) usage(argv[0]);
+
+  // Compute slice parameters
+  int n_loc = (int)(4096/std::sqrt(psize));
+  N = (int)(4096*std::sqrt(psize));
+
+  Simulation simu(n_loc, N, n_iter, prank, psize);
+
+  simu.set_initial_conditions();
+
+  float l2;
+  int k;
+
+  auto start = clk::now();
+  std::tie(l2, k) = simu.compute();
+  auto end = clk::now();
+
+  second time = end - start;
+
+  if(prank == 0)
+    std::cout << psize << " " << N << " "
+              << k << " " << std::scientific << l2 << " "
+              << time.count() << std::endl;
+
+  MPI_Finalize();
+
+  return 0;
+}
diff --git a/poisson_wip/sc1024.sh b/poisson_wip/sc1024.sh
new file mode 100644
index 0000000..5c17746
--- /dev/null
+++ b/poisson_wip/sc1024.sh
@@ -0,0 +1,4 @@
+#!/bin/bash -l
+#SBATCH -n 56
+
+srun poisson 1024 50 >> interesting.out
diff --git a/poisson_wip/sc2048.sh b/poisson_wip/sc2048.sh
new file mode 100644
index 0000000..74e4e7a
--- /dev/null
+++ b/poisson_wip/sc2048.sh
@@ -0,0 +1,4 @@
+#!/bin/bash -l
+#SBATCH -n 56
+
+srun poisson 2048 50 >> interesting.out
diff --git a/poisson_wip/sc4096.sh b/poisson_wip/sc4096.sh
new file mode 100644
index 0000000..a7f4e86
--- /dev/null
+++ b/poisson_wip/sc4096.sh
@@ -0,0 +1,4 @@
+#!/bin/bash -l
+#SBATCH -n 56
+
+srun poisson 4096 50 >> interesting.out
diff --git a/poisson_wip/sc512.sh b/poisson_wip/sc512.sh
new file mode 100644
index 0000000..1c4d2d3
--- /dev/null
+++ b/poisson_wip/sc512.sh
@@ -0,0 +1,4 @@
+#!/bin/bash -l
+#SBATCH -n 56
+
+srun poisson 512 50 >> interesting.out
diff --git a/poisson_wip/script.sh b/poisson_wip/script.sh
index 968b4ee..8185969 100644
--- a/poisson_wip/script.sh
+++ b/poisson_wip/script.sh
@@ -1,14 +1,4 @@
 #!/bin/bash -l
-##SBATCH --reservation=Course-math-454
-## Change to Course-math-454-week after class
-### SBATCH does not like the reservation parameter
-##SBATCH --account=math-454
-#SBATCH -n 4
+#SBATCH -n 16
  
-# Compilation
-make clean
-module purge
-module load gcc mvapich2
-make
-
-srun poisson 10 10
+srun poisson 4096 50
diff --git a/poisson_wip/simulation.cc b/poisson_wip/simulation.cc
index ea19856..ed57efb 100644
--- a/poisson_wip/simulation.cc
+++ b/poisson_wip/simulation.cc
@@ -1,104 +1,125 @@
 /* -------------------------------------------------------------------------- */
 #include "simulation.hh"
 /* -------------------------------------------------------------------------- */
 #include <cmath>
 #include <iostream>
 #include <mpi.h>
+#include <vector>
 /* -------------------------------------------------------------------------- */
 
 /* -------------------------------------------------------------------------- */
 Simulation::Simulation(int m, int n, int n_iter, int prank, int psize)
     : m_global_m(m), m_global_n(n), m_n_iter(n_iter), m_h_m(1. / m),
       m_h_n(1. / n), m_grids(m, n), m_f(m, n),
       m_dumper(new DumperASCII(m_grids.old())), m_prank(prank), m_psize(psize)  {}
 
 /* -------------------------------------------------------------------------- */
 void Simulation::set_initial_conditions() {
+
   for (int i = 0; i < m_global_m; i++) {
     for (int j = 0; j < m_global_n; j++) {
-      m_f(i, j) = -2. * 100. * M_PI * M_PI * std::sin(10. * M_PI * i * ((int)(m_prank*m_global_n/m_psize)) * m_h_m) *
+      m_f(i, j) = -2. * 100. * M_PI * M_PI * std::sin(10. * M_PI * ( i + (int)(m_prank*m_global_n/m_psize)) * m_h_m) *
                   std::sin(10. * M_PI * j * m_h_n);
     } // assuming square global problem size
   }
 }
 
 /* -------------------------------------------------------------------------- */
 std::tuple<float, int> Simulation::compute() {
   int s = 0;
   float l2 = 0;
   int next = m_prank+1;
   int prev = m_prank-1;
 
   do {
     /// Compute step
     l2 = compute_step();
-    m_grids.swap();
-    std::cout << "Process " << m_prank << " just computed iteration " << s << std::endl << std::flush;
 
-    /// Launch communications
-    Grid & uo = m_grids.old();
-
-/* ATTEMPTED ASYNCHRONOUS VERSION
-    //Send, recieve relevant data to/from previous process (unless you're the first process)
-    if (m_prank>0){
-	MPI_Isend(&u(0, 0), m_global_n, MPI_DOUBLE, prev, 13, MPI_COMM_WORLD, &request_ns);
-	MPI_Recv(&u(1, 0), m_global_n, MPI_DOUBLE, prev, 13,  MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-	}
-
-    //Send, recieve data to/from next process (unless you're the last process)
-    if (m_prank < m_psize){
-    if (m_prank != 0 && m_prank < m_psize-1){
-	MPI_Isend(&u(m_global_m-1, 0), m_global_n, MPI_DOUBLE, next, 13, MPI_COMM_WORLD, &request_ps);
-	MPI_Recv(&u(m_global_m, 0), m_global_n, MPI_DOUBLE, next, 13, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-	}
+    Grid & u = m_grids.current();
+
+    // Prepare buffers
+    float *send_a = 0, *send_b = 0, *recv_a = 0, *recv_b = 0;
+    send_a = new float [m_global_n];
+    send_b = new float [m_global_n];
+    recv_a = new float [m_global_n];
+    recv_b = new float [m_global_n];
     
-    /// Wait until data has been sent/recieved (This can be optimized)
-    MPI_Wait(&request_ns, MPI_STATUS_IGNORE);
-    MPI_Wait(&request_ps, MPI_STATUS_IGNORE);
-*/
-
-    /* ATTEMPTED SYNCHRONOUS VERSION */
-    if (m_prank > 0){
-	std::cout << "Process " << m_prank << " about to exchange information with his predecessor " << prev << std::endl << std::flush;
-        MPI_Sendrecv(&uo(0, 0), m_global_n, MPI_DOUBLE, prev, 13,
-		&uo(1, 0), m_global_n, MPI_DOUBLE, prev, 13,
-		MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-        std::cout << "Process " << m_prank << " has recieved data from predecessor " << prev << std::endl << std::flush;
-	}
-  
-    if (m_prank < m_psize-1){
-        std::cout << "Process " << m_prank << " about to exchange information with successor " << next << std::endl << std::flush;
-	MPI_Sendrecv(&uo(m_global_m-1, 0), m_global_n, MPI_DOUBLE, next, 13,
-		&uo(m_global_m, 0), m_global_n, MPI_DOUBLE, next, 13,
-		MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-	std::cout << "Process " << m_prank << " has recieved data from successor " << next << std::endl << std::flush;
-	}
+    for (int i = 0; i < m_global_n; i++){
+      send_a[i] = u(1, i);
+      send_b[i] = u(m_global_m-2, i);
+    }
+
+    /// Launch communications
+    /* Strategy: even processes first communicate with the process below them */
+    for (int s = 0; s < 2; s++) {
+
+      // Exchange information with block above (unless you're the first block)
+      if (m_prank > 0 && m_prank%2 != s){
+
+      	// Send/recieve buffer content
+        MPI_Request request_a;
+        MPI_Isend(send_a, m_global_n, MPI_FLOAT, prev, 13, MPI_COMM_WORLD, &request_a);
+        MPI_Irecv(recv_a, m_global_n, MPI_FLOAT, prev, 13, MPI_COMM_WORLD, &request_a);
+	
+	MPI_Wait(&request_a, MPI_STATUS_IGNORE);
+
+	//Accept data
+	for (int i = 0; i < m_global_n; i++) {
+	  u(0, i) = recv_a[i];
+        }
+	
+      }
+ 
+      // Exchange information with block below (unless you're the last block)
+      if (m_prank < m_psize - 1 && m_prank%2 == s){
+	
+	// Send/recieve buffer content
+	MPI_Request request_b;
+	MPI_Isend(send_b, m_global_n, MPI_FLOAT, next, 13, MPI_COMM_WORLD, &request_b);
+	MPI_Irecv(recv_b, m_global_n, MPI_FLOAT, next, 13, MPI_COMM_WORLD, &request_b);	
+
+	MPI_Wait(&request_b, MPI_STATUS_IGNORE);
+
+	// Accept data
+        for (int i = 0; i < m_global_n; i++) {
+          u(m_global_m-1, i) = recv_b[i];
+        }
+	
+
+      }
+    }
+
+    // Deallocate used memory once it is safe to do so
+    //MPI_Wait(&request_a, MPI_STATUS_IGNORE);
+    delete[] send_a; delete[] recv_a;
 
+    //MPI_Wait(&request_b, MPI_STATUS_IGNORE);
+    delete[] send_b; delete[] recv_b;
+
+    m_grids.swap();    
     ++s;
   } while (s < m_n_iter);
 
-//  m_dumper->dump(s);
-
   return std::make_tuple(l2, s);
 }
 
 /* -------------------------------------------------------------------------- */
 float Simulation::compute_step() {
   float l2 = 0.;
 
   Grid & u = m_grids.current();
   Grid & uo = m_grids.old();
 
   for (int i = 1; i < m_global_m - 1; i++) {
     for (int j = 1; j < m_global_n - 1; j++) {
       // computation of the new step
       u(i, j) = 0.25 * (uo(i - 1, j) + uo(i + 1, j) + uo(i, j - 1) +
                         uo(i, j + 1) - m_f(i, j) * m_h_m * m_h_n);
 
       // L2 norm
       l2 += (uo(i, j) - u(i, j)) * (uo(i, j) - u(i, j));
     }
   }
 
   return l2;
 }
diff --git a/poisson_wip/simulation.cc b/poisson_wip/simulation_sendrecv.cc
similarity index 52%
copy from poisson_wip/simulation.cc
copy to poisson_wip/simulation_sendrecv.cc
index ea19856..b1a3811 100644
--- a/poisson_wip/simulation.cc
+++ b/poisson_wip/simulation_sendrecv.cc
@@ -1,104 +1,131 @@
 /* -------------------------------------------------------------------------- */
 #include "simulation.hh"
 /* -------------------------------------------------------------------------- */
 #include <cmath>
 #include <iostream>
 #include <mpi.h>
+#include <vector>
 /* -------------------------------------------------------------------------- */
 
 /* -------------------------------------------------------------------------- */
 Simulation::Simulation(int m, int n, int n_iter, int prank, int psize)
     : m_global_m(m), m_global_n(n), m_n_iter(n_iter), m_h_m(1. / m),
       m_h_n(1. / n), m_grids(m, n), m_f(m, n),
       m_dumper(new DumperASCII(m_grids.old())), m_prank(prank), m_psize(psize)  {}
 
 /* -------------------------------------------------------------------------- */
 void Simulation::set_initial_conditions() {
+
   for (int i = 0; i < m_global_m; i++) {
     for (int j = 0; j < m_global_n; j++) {
-      m_f(i, j) = -2. * 100. * M_PI * M_PI * std::sin(10. * M_PI * i * ((int)(m_prank*m_global_n/m_psize)) * m_h_m) *
+      m_f(i, j) = -2. * 100. * M_PI * M_PI * std::sin(10. * M_PI * ( i + (int)(m_prank*m_global_n/m_psize)) * m_h_m) *
                   std::sin(10. * M_PI * j * m_h_n);
     } // assuming square global problem size
   }
 }
 
 /* -------------------------------------------------------------------------- */
 std::tuple<float, int> Simulation::compute() {
   int s = 0;
   float l2 = 0;
   int next = m_prank+1;
   int prev = m_prank-1;
 
   do {
     /// Compute step
     l2 = compute_step();
-    m_grids.swap();
-    std::cout << "Process " << m_prank << " just computed iteration " << s << std::endl << std::flush;
+    //std::cout << "Process " << m_prank << " just computed iteration " << s << std::endl << std::flush;
 
     /// Launch communications
-    Grid & uo = m_grids.old();
-
-/* ATTEMPTED ASYNCHRONOUS VERSION
-    //Send, recieve relevant data to/from previous process (unless you're the first process)
-    if (m_prank>0){
-	MPI_Isend(&u(0, 0), m_global_n, MPI_DOUBLE, prev, 13, MPI_COMM_WORLD, &request_ns);
-	MPI_Recv(&u(1, 0), m_global_n, MPI_DOUBLE, prev, 13,  MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-	}
-
-    //Send, recieve data to/from next process (unless you're the last process)
-    if (m_prank < m_psize){
-    if (m_prank != 0 && m_prank < m_psize-1){
-	MPI_Isend(&u(m_global_m-1, 0), m_global_n, MPI_DOUBLE, next, 13, MPI_COMM_WORLD, &request_ps);
-	MPI_Recv(&u(m_global_m, 0), m_global_n, MPI_DOUBLE, next, 13, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-	}
-    
-    /// Wait until data has been sent/recieved (This can be optimized)
-    MPI_Wait(&request_ns, MPI_STATUS_IGNORE);
-    MPI_Wait(&request_ps, MPI_STATUS_IGNORE);
-*/
+    Grid & u = m_grids.current();
 
     /* ATTEMPTED SYNCHRONOUS VERSION */
+
+    // Exchange information with block above (unless you're the first block)
     if (m_prank > 0){
-	std::cout << "Process " << m_prank << " about to exchange information with his predecessor " << prev << std::endl << std::flush;
-        MPI_Sendrecv(&uo(0, 0), m_global_n, MPI_DOUBLE, prev, 13,
-		&uo(1, 0), m_global_n, MPI_DOUBLE, prev, 13,
-		MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-        std::cout << "Process " << m_prank << " has recieved data from predecessor " << prev << std::endl << std::flush;
+
+	// Prepare send, recieve buffers
+	float *send = 0;
+	float *recv = 0;
+	send = new float [m_global_n];
+	recv = new float [m_global_n];
+	
+	for (int i = 0; i < m_global_n; i++) {
+	  send[i] = u(1, 0);
 	}
-  
-    if (m_prank < m_psize-1){
-        std::cout << "Process " << m_prank << " about to exchange information with successor " << next << std::endl << std::flush;
-	MPI_Sendrecv(&uo(m_global_m-1, 0), m_global_n, MPI_DOUBLE, next, 13,
-		&uo(m_global_m, 0), m_global_n, MPI_DOUBLE, next, 13,
+
+	// Send/recieve buffer content
+        MPI_Sendrecv(send, m_global_n, MPI_FLOAT, prev, 13,
+		recv, m_global_n, MPI_FLOAT, prev, 13,
 		MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-	std::cout << "Process " << m_prank << " has recieved data from successor " << next << std::endl << std::flush;
-	}
+	
+	//Accept data
+	for (int i = 0; i < m_global_n; i++) {
+	  u(0, i) = recv[i];
+        }
+	
+	// Deallocate used memory
+	delete [] send;
+	delete [] recv;
+
+    }
+ 
+    // Exchange information with block below (unless you're the last block)
+    if (m_prank < m_psize - 1){
+	
+	// Prepare send, recieve buffers
+	float *send = 0;
+	float *recv = 0;
+	send = new float [m_global_n];
+	recv = new float [m_global_n];
+
+        for (int i = 0; i < m_global_n; i++) {
+          send[i] = u(m_global_m-2, i);
+	  std::cout << "OUT: " << send[i] << " ";
+        }
+	std::cout << std::endl << std::flush;
+	
+	// Send/recieve buffer content
+        MPI_Sendrecv(send, m_global_n, MPI_FLOAT, next, 13,
+                recv, m_global_n, MPI_FLOAT, next, 13,
+                MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+	// Accept data
+        for (int i = 0; i < m_global_n; i++) {
+          u(m_global_m-1, i) = recv[i];
+	  std::cout << "IN " << recv[i] << " ";
+        }
+
+        // Deallocate used memory
+        delete [] send;
+        delete [] recv;
 
+    }
+
+    m_grids.swap();    
     ++s;
   } while (s < m_n_iter);
 
-//  m_dumper->dump(s);
-
   return std::make_tuple(l2, s);
 }
 
 /* -------------------------------------------------------------------------- */
 float Simulation::compute_step() {
   float l2 = 0.;
 
   Grid & u = m_grids.current();
   Grid & uo = m_grids.old();
 
   for (int i = 1; i < m_global_m - 1; i++) {
     for (int j = 1; j < m_global_n - 1; j++) {
       // computation of the new step
       u(i, j) = 0.25 * (uo(i - 1, j) + uo(i + 1, j) + uo(i, j - 1) +
                         uo(i, j + 1) - m_f(i, j) * m_h_m * m_h_n);
 
       // L2 norm
       l2 += (uo(i, j) - u(i, j)) * (uo(i, j) - u(i, j));
     }
   }
 
   return l2;
 }
diff --git a/poisson_wip/smartloop.sh b/poisson_wip/smartloop.sh
new file mode 100644
index 0000000..e2bbc14
--- /dev/null
+++ b/poisson_wip/smartloop.sh
@@ -0,0 +1,38 @@
+#!/bin/bash -l
+
+# Clean terminal and pwd
+clear
+rm -r slurm-*
+
+# Compilation
+echo Compiling...
+make clean
+make
+
+# Prepare collection file
+echo Preparing collection file...
+rm -r out.txt
+touch out.txt
+
+# Run programs
+echo Running...
+
+for ppdir in 512 1024 2048 4096
+do
+	for numproc in 1 2 4 8 16 32 56
+	do
+		echo $ppdir points and $numproc processes
+		#SBATCH -n $numproc
+		srun poisson $ppdir 50 >> out.txt
+	done
+done 
+
+# Collect output
+echo Collecting output
+rm -r outs.txt
+touch outs.txt
+cat slurm-* >> outs.txt
+cat out.txt
+cat outs.txt
+
+echo Done!