diff --git a/mpi/.solutions/Makefile b/mpi/.solutions/Makefile index 7d59afd..5b16822 100644 --- a/mpi/.solutions/Makefile +++ b/mpi/.solutions/Makefile @@ -1,17 +1,17 @@ OPTIM+=-O3 CXX=mpicxx CC=mpicxx LD=${CXX} CXXFLAGS+=-Wall -Wextra -std=c++11 $(OPTIM) LDFLAGS+=$(OPTIM) -lm -EXECS=pi pi_p2p_ring pi_p2p_async_ring pi_gather pi_reduce pi_io_at pi_hybrid pi_p2p_sendrecv_ring +EXECS=pi pi_p2p_ring pi_p2p_async_ring pi_gather pi_reduce pi_io_at pi_hybrid pi_p2p_sendrecv_ring pi_p2p_permanent_ring pi_p2p_derived_type all: clean $(EXECS) pi_hybrid: pi_hybrid.cc $(CXX) $(CXXFLAGS) $(LDFLAGS) -fopenmp -o $@ $< clean: rm -f $(EXECS) *.o *~ diff --git a/mpi/.solutions/pi_io_at.cc b/mpi/.solutions/pi_io_at.cc index c560740..6460702 100644 --- a/mpi/.solutions/pi_io_at.cc +++ b/mpi/.solutions/pi_io_at.cc @@ -1,80 +1,87 @@ /* This exercise is taken from the class Parallel Programming Workshop (MPI, OpenMP and Advanced Topics) at HLRS given by Rolf Rabenseifner */ #include #include #include #include #include using clk = std::chrono::high_resolution_clock; using second = std::chrono::duration; using time_point = std::chrono::time_point; inline int digit(double x, int n) { return std::trunc(x * std::pow(10., n)) - std::trunc(x * std::pow(10., n - 1)) *10.; } inline double f(double a) { return (4. / (1. + a * a)); } const int n = 10000000; int main(int /* argc */ , char ** /* argv */) { int i; double dx, x, sum, pi; int psize, prank; MPI_Init(NULL, NULL); MPI_Comm_size(MPI_COMM_WORLD, &psize); MPI_Comm_rank(MPI_COMM_WORLD, &prank); auto mpi_t1 = MPI_Wtime(); auto t1 = clk::now(); int nlocal = n / psize; int istart = 1 + nlocal * prank; int iend = nlocal * (prank + 1); /* calculate pi = integral [0..1] 4 / (1 + x**2) dx */ dx = 1. / n; sum = 0.0; for (i = istart; i <= iend; i++) { x = (1. * i - 0.5) * dx; sum = sum + f(x); } MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); pi = dx * sum; auto mpi_elapsed = MPI_Wtime() - mpi_t1; second elapsed = clk::now() - t1; if(prank == 0) { std::printf("computed pi = %.16g\n", pi); std::printf("wall clock time (mpi_wtime) = %.4gs with %d process\n", mpi_elapsed, psize); std::printf("wall clock time (chrono) = %.4gs\n", elapsed.count()); } char zero = '0'; int ndigits = 16 / psize; int dstart = ndigits * prank; std::vector digits(ndigits); - for(int d = 0; d < ndigits; ++d) { + for (int d = 0; d < ndigits; ++d) { digits[d] = zero + digit(pi, dstart + d); } + // open a file MPI_File file; - MPI_File_open(MPI_COMM_WORLD, "pi.dat", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file); - MPI_File_set_size(file, 0); - MPI_File_write_at(file, dstart, digits.data(), digits.size(), MPI_CHAR, MPI_STATUS_IGNORE); + MPI_File_open(MPI_COMM_WORLD, "pi.dat", MPI_MODE_WRONLY | MPI_MODE_CREATE, + MPI_INFO_NULL, &file); + MPI_File_set_size(file, 16); + + // write the vector with MPI_File_write_at + MPI_File_write_at(file, dstart, digits.data(), digits.size(), MPI_CHAR, + MPI_STATUS_IGNORE); + + // close the file MPI_File_close(&file); MPI_Finalize(); return 0; } diff --git a/mpi/.solutions/pi_io_at.cc b/mpi/.solutions/pi_p2p_derived_type.cc similarity index 50% copy from mpi/.solutions/pi_io_at.cc copy to mpi/.solutions/pi_p2p_derived_type.cc index c560740..02407c0 100644 --- a/mpi/.solutions/pi_io_at.cc +++ b/mpi/.solutions/pi_p2p_derived_type.cc @@ -1,80 +1,110 @@ /* This exercise is taken from the class Parallel Programming Workshop (MPI, OpenMP and Advanced Topics) at HLRS given by Rolf Rabenseifner */ #include #include #include #include -#include using clk = std::chrono::high_resolution_clock; using second = std::chrono::duration; using time_point = std::chrono::time_point; inline int digit(double x, int n) { return std::trunc(x * std::pow(10., n)) - std::trunc(x * std::pow(10., n - 1)) *10.; } inline double f(double a) { return (4. / (1. + a * a)); } const int n = 10000000; int main(int /* argc */ , char ** /* argv */) { int i; - double dx, x, sum, pi; + double dx, x, pi; int psize, prank; + struct Sum { + double sum; + int rank; + }; + MPI_Init(NULL, NULL); MPI_Comm_size(MPI_COMM_WORLD, &psize); MPI_Comm_rank(MPI_COMM_WORLD, &prank); + Sum sum{0., 0}; + Sum send, recv; + + int blk_length[2] = {1, 1}; + + MPI_Aint zero_address, first_address, second_address; + MPI_Get_address(&send, &zero_address); + MPI_Get_address(&send.sum, &first_address); + MPI_Get_address(&send.rank, &second_address); + + MPI_Aint displs[2]; + displs[0] = MPI_Aint_diff(first_address, zero_address);; + displs[1] = MPI_Aint_diff(second_address, first_address); + + MPI_Datatype types[2] = {MPI_DOUBLE, MPI_INT}; + MPI_Datatype sum_t; + MPI_Type_create_struct(2, blk_length, displs, types, &sum_t); + MPI_Type_commit(&sum_t); + auto mpi_t1 = MPI_Wtime(); auto t1 = clk::now(); int nlocal = n / psize; int istart = 1 + nlocal * prank; int iend = nlocal * (prank + 1); /* calculate pi = integral [0..1] 4 / (1 + x**2) dx */ dx = 1. / n; - sum = 0.0; + for (i = istart; i <= iend; i++) { x = (1. * i - 0.5) * dx; - sum = sum + f(x); + sum.sum = sum.sum + f(x); + } + + int next = (prank + 1) % psize; + int prev = (prank - 1 + psize) % psize; + + MPI_Request request; + send = sum; + send.rank = prank; + for(int s = 1; s < psize; ++s) { + MPI_Isend(&send, 1, sum_t, next, 13, MPI_COMM_WORLD, &request); + MPI_Recv(&recv, 1, sum_t, prev, 13, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + sum.sum += recv.sum; + sum.rank += recv.rank; + + MPI_Wait(&request, MPI_STATUS_IGNORE); + + send = recv; } - MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, &sum, 1, sum_t, MPI_SUM, MPI_COMM_WORLD); - pi = dx * sum; + pi = dx * sum.sum; auto mpi_elapsed = MPI_Wtime() - mpi_t1; second elapsed = clk::now() - t1; if(prank == 0) { - std::printf("computed pi = %.16g\n", pi); + std::printf("computed pi = %.16g / %d\n", pi, sum.rank); std::printf("wall clock time (mpi_wtime) = %.4gs with %d process\n", mpi_elapsed, psize); std::printf("wall clock time (chrono) = %.4gs\n", elapsed.count()); - } - char zero = '0'; - int ndigits = 16 / psize; - int dstart = ndigits * prank; - - std::vector digits(ndigits); - for(int d = 0; d < ndigits; ++d) { - digits[d] = zero + digit(pi, dstart + d); + for(int d = 1; d <= 15; ++d) { + std::printf("%d", digit(pi, d)); + } } - MPI_File file; - MPI_File_open(MPI_COMM_WORLD, "pi.dat", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file); - MPI_File_set_size(file, 0); - MPI_File_write_at(file, dstart, digits.data(), digits.size(), MPI_CHAR, MPI_STATUS_IGNORE); - MPI_File_close(&file); - MPI_Finalize(); return 0; } diff --git a/mpi/.solutions/pi_io_at.cc b/mpi/.solutions/pi_p2p_permanent_ring.cc similarity index 67% copy from mpi/.solutions/pi_io_at.cc copy to mpi/.solutions/pi_p2p_permanent_ring.cc index c560740..a2c6742 100644 --- a/mpi/.solutions/pi_io_at.cc +++ b/mpi/.solutions/pi_p2p_permanent_ring.cc @@ -1,80 +1,93 @@ /* This exercise is taken from the class Parallel Programming Workshop (MPI, OpenMP and Advanced Topics) at HLRS given by Rolf Rabenseifner */ #include #include #include #include -#include using clk = std::chrono::high_resolution_clock; using second = std::chrono::duration; using time_point = std::chrono::time_point; inline int digit(double x, int n) { return std::trunc(x * std::pow(10., n)) - std::trunc(x * std::pow(10., n - 1)) *10.; } inline double f(double a) { return (4. / (1. + a * a)); } const int n = 10000000; int main(int /* argc */ , char ** /* argv */) { int i; double dx, x, sum, pi; int psize, prank; MPI_Init(NULL, NULL); MPI_Comm_size(MPI_COMM_WORLD, &psize); MPI_Comm_rank(MPI_COMM_WORLD, &prank); auto mpi_t1 = MPI_Wtime(); auto t1 = clk::now(); int nlocal = n / psize; int istart = 1 + nlocal * prank; int iend = nlocal * (prank + 1); /* calculate pi = integral [0..1] 4 / (1 + x**2) dx */ dx = 1. / n; sum = 0.0; for (i = istart; i <= iend; i++) { x = (1. * i - 0.5) * dx; sum = sum + f(x); } - MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + int next = (prank + 1) % psize; + int prev = (prank - 1 + psize) % psize; + + double send, recv; + MPI_Request request[2]; + + send = sum; + MPI_Send_init(&send, 1, MPI_DOUBLE, next, 13, MPI_COMM_WORLD, request); + MPI_Recv_init(&recv, 1, MPI_DOUBLE, prev, 13, MPI_COMM_WORLD, request + 1); + + for(int s = 1; s < psize; ++s) { + MPI_Startall(2, request); + + // ensure that receive is finished before using recv + MPI_Wait(request + 1, MPI_STATUS_IGNORE); + + sum += recv; + + // ensure that send is finished + MPI_Wait(request, MPI_STATUS_IGNORE); + + send = recv; + } pi = dx * sum; + MPI_Request_free(request); + MPI_Request_free(request + 1); + auto mpi_elapsed = MPI_Wtime() - mpi_t1; second elapsed = clk::now() - t1; if(prank == 0) { std::printf("computed pi = %.16g\n", pi); std::printf("wall clock time (mpi_wtime) = %.4gs with %d process\n", mpi_elapsed, psize); std::printf("wall clock time (chrono) = %.4gs\n", elapsed.count()); - } - - char zero = '0'; - int ndigits = 16 / psize; - int dstart = ndigits * prank; - std::vector digits(ndigits); - for(int d = 0; d < ndigits; ++d) { - digits[d] = zero + digit(pi, dstart + d); + for(int d = 1; d <= 15; ++d) { + std::printf("%d", digit(pi, d)); + } } - MPI_File file; - MPI_File_open(MPI_COMM_WORLD, "pi.dat", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file); - MPI_File_set_size(file, 0); - MPI_File_write_at(file, dstart, digits.data(), digits.size(), MPI_CHAR, MPI_STATUS_IGNORE); - MPI_File_close(&file); - MPI_Finalize(); return 0; }