diff --git a/mpi/.solutions/Makefile b/mpi/.solutions/Makefile
index 7d59afd..5b16822 100644
--- a/mpi/.solutions/Makefile
+++ b/mpi/.solutions/Makefile
@@ -1,17 +1,17 @@
 OPTIM+=-O3
 
 CXX=mpicxx
 CC=mpicxx
 LD=${CXX}
 CXXFLAGS+=-Wall -Wextra -std=c++11 $(OPTIM)
 LDFLAGS+=$(OPTIM) -lm
 
-EXECS=pi pi_p2p_ring pi_p2p_async_ring pi_gather pi_reduce pi_io_at pi_hybrid pi_p2p_sendrecv_ring
+EXECS=pi pi_p2p_ring pi_p2p_async_ring pi_gather pi_reduce pi_io_at pi_hybrid pi_p2p_sendrecv_ring pi_p2p_permanent_ring pi_p2p_derived_type
 
 all: clean $(EXECS)
 
 pi_hybrid: pi_hybrid.cc
 	$(CXX) $(CXXFLAGS) $(LDFLAGS) -fopenmp -o $@ $<
 
 clean:
 	rm -f $(EXECS) *.o *~
diff --git a/mpi/.solutions/pi_io_at.cc b/mpi/.solutions/pi_io_at.cc
index c560740..6460702 100644
--- a/mpi/.solutions/pi_io_at.cc
+++ b/mpi/.solutions/pi_io_at.cc
@@ -1,80 +1,87 @@
 /*
   This exercise is taken from the class Parallel Programming Workshop (MPI,
   OpenMP and Advanced Topics) at HLRS given by Rolf Rabenseifner
  */
 
 #include <chrono>
 #include <cstdio>
 #include <cmath>
 #include <mpi.h>
 #include <vector>
 
 using clk = std::chrono::high_resolution_clock;
 using second = std::chrono::duration<double>;
 using time_point = std::chrono::time_point<clk>;
 
 inline int digit(double x, int n) {
   return std::trunc(x * std::pow(10., n)) - std::trunc(x * std::pow(10., n - 1)) *10.;
 }
 
 inline double f(double a) { return (4. / (1. + a * a)); }
 
 const int n = 10000000;
 
 int main(int /* argc */ , char ** /* argv */) {
   int i;
   double dx, x, sum, pi;
   int psize, prank;
 
   MPI_Init(NULL, NULL);
 
   MPI_Comm_size(MPI_COMM_WORLD, &psize);
   MPI_Comm_rank(MPI_COMM_WORLD, &prank);
 
   auto mpi_t1 = MPI_Wtime();
   auto t1 = clk::now();
 
   int nlocal = n / psize;
   int istart = 1 + nlocal * prank;
   int iend = nlocal * (prank + 1);
 
   /* calculate pi = integral [0..1] 4 / (1 + x**2) dx */
   dx = 1. / n;
   sum = 0.0;
   for (i = istart; i <= iend; i++) {
     x = (1. * i - 0.5) * dx;
     sum = sum + f(x);
   }
 
   MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 
   pi = dx * sum;
 
   auto mpi_elapsed = MPI_Wtime() - mpi_t1;
   second elapsed = clk::now() - t1;
 
   if(prank == 0) {
     std::printf("computed pi                 = %.16g\n", pi);
     std::printf("wall clock time (mpi_wtime) = %.4gs with %d process\n", mpi_elapsed, psize);
     std::printf("wall clock time (chrono)    = %.4gs\n", elapsed.count());
   }
 
   char zero = '0';
   int ndigits = 16 / psize;
   int dstart = ndigits * prank;
 
   std::vector<char> digits(ndigits);
-  for(int d = 0; d < ndigits; ++d) {
+  for (int d = 0; d < ndigits; ++d) {
     digits[d] = zero + digit(pi, dstart + d);
   }
 
+  // open a file
   MPI_File file;
-  MPI_File_open(MPI_COMM_WORLD, "pi.dat", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file);
-  MPI_File_set_size(file, 0);
-  MPI_File_write_at(file, dstart, digits.data(), digits.size(), MPI_CHAR, MPI_STATUS_IGNORE);
+  MPI_File_open(MPI_COMM_WORLD, "pi.dat", MPI_MODE_WRONLY | MPI_MODE_CREATE,
+                MPI_INFO_NULL, &file);
+  MPI_File_set_size(file, 16);
+
+  // write the vector with MPI_File_write_at
+  MPI_File_write_at(file, dstart, digits.data(), digits.size(), MPI_CHAR,
+                    MPI_STATUS_IGNORE);
+
+  // close the file
   MPI_File_close(&file);
 
   MPI_Finalize();
 
   return 0;
 }
diff --git a/mpi/.solutions/pi_io_at.cc b/mpi/.solutions/pi_p2p_derived_type.cc
similarity index 50%
copy from mpi/.solutions/pi_io_at.cc
copy to mpi/.solutions/pi_p2p_derived_type.cc
index c560740..02407c0 100644
--- a/mpi/.solutions/pi_io_at.cc
+++ b/mpi/.solutions/pi_p2p_derived_type.cc
@@ -1,80 +1,110 @@
 /*
   This exercise is taken from the class Parallel Programming Workshop (MPI,
   OpenMP and Advanced Topics) at HLRS given by Rolf Rabenseifner
  */
 
 #include <chrono>
 #include <cstdio>
 #include <cmath>
 #include <mpi.h>
-#include <vector>
 
 using clk = std::chrono::high_resolution_clock;
 using second = std::chrono::duration<double>;
 using time_point = std::chrono::time_point<clk>;
 
 inline int digit(double x, int n) {
   return std::trunc(x * std::pow(10., n)) - std::trunc(x * std::pow(10., n - 1)) *10.;
 }
 
 inline double f(double a) { return (4. / (1. + a * a)); }
 
 const int n = 10000000;
 
 int main(int /* argc */ , char ** /* argv */) {
   int i;
-  double dx, x, sum, pi;
+  double dx, x, pi;
   int psize, prank;
 
+  struct Sum {
+    double sum;
+    int rank;
+  };
+
   MPI_Init(NULL, NULL);
 
   MPI_Comm_size(MPI_COMM_WORLD, &psize);
   MPI_Comm_rank(MPI_COMM_WORLD, &prank);
 
+  Sum sum{0., 0};
+  Sum send, recv;
+
+  int blk_length[2] = {1, 1};
+
+  MPI_Aint zero_address, first_address, second_address;
+  MPI_Get_address(&send, &zero_address);
+  MPI_Get_address(&send.sum, &first_address);
+  MPI_Get_address(&send.rank, &second_address);
+
+  MPI_Aint displs[2];
+  displs[0] = MPI_Aint_diff(first_address, zero_address);;
+  displs[1] = MPI_Aint_diff(second_address, first_address);
+
+  MPI_Datatype types[2] = {MPI_DOUBLE, MPI_INT};
+  MPI_Datatype sum_t;
+  MPI_Type_create_struct(2, blk_length, displs, types, &sum_t);
+  MPI_Type_commit(&sum_t);
+
   auto mpi_t1 = MPI_Wtime();
   auto t1 = clk::now();
 
   int nlocal = n / psize;
   int istart = 1 + nlocal * prank;
   int iend = nlocal * (prank + 1);
 
   /* calculate pi = integral [0..1] 4 / (1 + x**2) dx */
   dx = 1. / n;
-  sum = 0.0;
+
   for (i = istart; i <= iend; i++) {
     x = (1. * i - 0.5) * dx;
-    sum = sum + f(x);
+    sum.sum = sum.sum + f(x);
+  }
+
+  int next = (prank + 1) % psize;
+  int prev = (prank - 1 + psize) % psize;
+
+  MPI_Request request;
+  send = sum;
+  send.rank = prank;
+  for(int s = 1; s < psize; ++s) {
+    MPI_Isend(&send, 1, sum_t, next, 13, MPI_COMM_WORLD, &request);
+    MPI_Recv(&recv, 1, sum_t, prev, 13, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+    sum.sum += recv.sum;
+    sum.rank += recv.rank;
+
+    MPI_Wait(&request, MPI_STATUS_IGNORE);
+
+    send = recv;
   }
 
-  MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  MPI_Allreduce(MPI_IN_PLACE, &sum, 1, sum_t, MPI_SUM, MPI_COMM_WORLD);
 
-  pi = dx * sum;
+  pi = dx * sum.sum;
 
   auto mpi_elapsed = MPI_Wtime() - mpi_t1;
   second elapsed = clk::now() - t1;
 
   if(prank == 0) {
-    std::printf("computed pi                 = %.16g\n", pi);
+    std::printf("computed pi                 = %.16g / %d\n", pi, sum.rank);
     std::printf("wall clock time (mpi_wtime) = %.4gs with %d process\n", mpi_elapsed, psize);
     std::printf("wall clock time (chrono)    = %.4gs\n", elapsed.count());
-  }
 
-  char zero = '0';
-  int ndigits = 16 / psize;
-  int dstart = ndigits * prank;
-
-  std::vector<char> digits(ndigits);
-  for(int d = 0; d < ndigits; ++d) {
-    digits[d] = zero + digit(pi, dstart + d);
+    for(int d = 1; d <= 15; ++d) {
+      std::printf("%d", digit(pi, d));
+    }
   }
 
-  MPI_File file;
-  MPI_File_open(MPI_COMM_WORLD, "pi.dat", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file);
-  MPI_File_set_size(file, 0);
-  MPI_File_write_at(file, dstart, digits.data(), digits.size(), MPI_CHAR, MPI_STATUS_IGNORE);
-  MPI_File_close(&file);
-
   MPI_Finalize();
 
   return 0;
 }
diff --git a/mpi/.solutions/pi_io_at.cc b/mpi/.solutions/pi_p2p_permanent_ring.cc
similarity index 67%
copy from mpi/.solutions/pi_io_at.cc
copy to mpi/.solutions/pi_p2p_permanent_ring.cc
index c560740..a2c6742 100644
--- a/mpi/.solutions/pi_io_at.cc
+++ b/mpi/.solutions/pi_p2p_permanent_ring.cc
@@ -1,80 +1,93 @@
 /*
   This exercise is taken from the class Parallel Programming Workshop (MPI,
   OpenMP and Advanced Topics) at HLRS given by Rolf Rabenseifner
  */
 
 #include <chrono>
 #include <cstdio>
 #include <cmath>
 #include <mpi.h>
-#include <vector>
 
 using clk = std::chrono::high_resolution_clock;
 using second = std::chrono::duration<double>;
 using time_point = std::chrono::time_point<clk>;
 
 inline int digit(double x, int n) {
   return std::trunc(x * std::pow(10., n)) - std::trunc(x * std::pow(10., n - 1)) *10.;
 }
 
 inline double f(double a) { return (4. / (1. + a * a)); }
 
 const int n = 10000000;
 
 int main(int /* argc */ , char ** /* argv */) {
   int i;
   double dx, x, sum, pi;
   int psize, prank;
 
   MPI_Init(NULL, NULL);
 
   MPI_Comm_size(MPI_COMM_WORLD, &psize);
   MPI_Comm_rank(MPI_COMM_WORLD, &prank);
 
   auto mpi_t1 = MPI_Wtime();
   auto t1 = clk::now();
 
   int nlocal = n / psize;
   int istart = 1 + nlocal * prank;
   int iend = nlocal * (prank + 1);
 
   /* calculate pi = integral [0..1] 4 / (1 + x**2) dx */
   dx = 1. / n;
   sum = 0.0;
   for (i = istart; i <= iend; i++) {
     x = (1. * i - 0.5) * dx;
     sum = sum + f(x);
   }
 
-  MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  int next = (prank + 1) % psize;
+  int prev = (prank - 1 + psize) % psize;
+
+  double send, recv;
+  MPI_Request request[2];
+
+  send = sum;
+  MPI_Send_init(&send, 1, MPI_DOUBLE, next, 13, MPI_COMM_WORLD, request);
+  MPI_Recv_init(&recv, 1, MPI_DOUBLE, prev, 13, MPI_COMM_WORLD, request + 1);
+
+  for(int s = 1; s < psize; ++s) {
+    MPI_Startall(2, request);
+
+    // ensure that receive is finished before using recv
+    MPI_Wait(request + 1, MPI_STATUS_IGNORE);
+
+    sum += recv;
+
+    // ensure that send is finished
+    MPI_Wait(request, MPI_STATUS_IGNORE);
+
+    send = recv;
+  }
 
   pi = dx * sum;
 
+  MPI_Request_free(request);
+  MPI_Request_free(request + 1);
+
   auto mpi_elapsed = MPI_Wtime() - mpi_t1;
   second elapsed = clk::now() - t1;
 
   if(prank == 0) {
     std::printf("computed pi                 = %.16g\n", pi);
     std::printf("wall clock time (mpi_wtime) = %.4gs with %d process\n", mpi_elapsed, psize);
     std::printf("wall clock time (chrono)    = %.4gs\n", elapsed.count());
-  }
-
-  char zero = '0';
-  int ndigits = 16 / psize;
-  int dstart = ndigits * prank;
 
-  std::vector<char> digits(ndigits);
-  for(int d = 0; d < ndigits; ++d) {
-    digits[d] = zero + digit(pi, dstart + d);
+    for(int d = 1; d <= 15; ++d) {
+      std::printf("%d", digit(pi, d));
+    }
   }
 
-  MPI_File file;
-  MPI_File_open(MPI_COMM_WORLD, "pi.dat", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file);
-  MPI_File_set_size(file, 0);
-  MPI_File_write_at(file, dstart, digits.data(), digits.size(), MPI_CHAR, MPI_STATUS_IGNORE);
-  MPI_File_close(&file);
-
   MPI_Finalize();
 
   return 0;
 }