diff --git a/pi/solutions-mpi/Makefile b/pi/solutions-mpi/Makefile index 788c84f..fc4e726 100644 --- a/pi/solutions-mpi/Makefile +++ b/pi/solutions-mpi/Makefile @@ -1,12 +1,12 @@ -OPTIM+=-O3 -march=native +OPTIM ?=-O3 -march=native CXX=mpicxx CC=mpicxx LD=${CXX} CXXFLAGS+=-Wall -Wextra -std=c++11 $(OPTIM) LDFLAGS+=-lm -EXE=pi_p2p_async.o pi_gather_bcast.o pi_reduce.o pi_p2p_sendrecv.o pi_p2p_sync.o +EXE=pi_p2p_async pi_gather_bcast pi_reduce pi_p2p_sendrecv pi_p2p_sync pi_p2p_persistent pi_1sided_put pi_1sided_get pi_io_at all: clean $(EXE) clean: rm -f $(EXE) *.o *~ diff --git a/pi/solutions-mpi/pi_1sided_get.cc b/pi/solutions-mpi/pi_1sided_get.cc new file mode 100644 index 0000000..2c960a0 --- /dev/null +++ b/pi/solutions-mpi/pi_1sided_get.cc @@ -0,0 +1,84 @@ +/* + This exercise is taken from the class Parallel Programming Workshop (MPI, + OpenMP and Advanced Topics) at HLRS given by Rolf Rabenseifner + */ +#include +#include +#include +#include + +using clk = std::chrono::high_resolution_clock; +using second = std::chrono::duration; +using time_point = std::chrono::time_point; + +inline int digit(double x, int n) { + return std::trunc(x * std::pow(10., n)) - std::trunc(x * std::pow(10., n - 1)) *10.; +} + +inline double f(double a) { return (4. / (1. + a * a)); } + +const int n = 10000000; + +int main(int /* argc */ , char ** /* argv */) { + int i; + double dx, x, sum, pi; + + MPI_Init(NULL, NULL); + int prank, psize; + MPI_Comm_rank(MPI_COMM_WORLD, &prank); + MPI_Comm_size(MPI_COMM_WORLD, &psize); + auto t1 = clk::now(); + + auto ln = n/psize + (prank < n % psize ? 1 : 0); + + auto i_start = prank * ln + (prank < n % psize ? 0 : n % psize); + auto i_end = i_start + ln; + + /* calculate pi = integral [0..1] 4 / (1 + x**2) dx */ + dx = 1. / n; + double lsum = 0.0; + for (i = i_start; i < i_end; i++) { + x = 1. * i * dx; + lsum = lsum + f(x); + } + + double send, recv; + + //auto next = (prank + 1) % psize; + auto prev = (prank - 1 + psize) % psize; + + MPI_Win win; + MPI_Win_create(&send, static_cast(sizeof(double)), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win); + + send = sum = lsum; + + for (int p = 0 ; p < psize -1; ++p) { + MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, win); + MPI_Get(&recv, 1, MPI_DOUBLE, prev, static_cast(0), 1, MPI_DOUBLE, win); + MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, win); + + sum += recv; + send = recv; + } + + pi = dx * sum; + + MPI_Win_free(&win); + + second elapsed = clk::now() - t1; + + if (prank == 0) { + std::printf("computed pi = %.16g\n", pi); + std::printf("wall clock time (chrono) = %.4gs\n", elapsed.count()); + std::printf("n mpi process = %.d\n", psize); + + + for(int d = 1; d <= 15; ++d) { + std::printf("%d", digit(pi, d)); + } + } + + MPI_Finalize(); + + return 0; +} diff --git a/pi/solutions-mpi/pi_1sided_put.cc b/pi/solutions-mpi/pi_1sided_put.cc new file mode 100644 index 0000000..288618a --- /dev/null +++ b/pi/solutions-mpi/pi_1sided_put.cc @@ -0,0 +1,84 @@ +/* + This exercise is taken from the class Parallel Programming Workshop (MPI, + OpenMP and Advanced Topics) at HLRS given by Rolf Rabenseifner + */ +#include +#include +#include +#include + +using clk = std::chrono::high_resolution_clock; +using second = std::chrono::duration; +using time_point = std::chrono::time_point; + +inline int digit(double x, int n) { + return std::trunc(x * std::pow(10., n)) - std::trunc(x * std::pow(10., n - 1)) *10.; +} + +inline double f(double a) { return (4. / (1. + a * a)); } + +const int n = 10000000; + +int main(int /* argc */ , char ** /* argv */) { + int i; + double dx, x, sum, pi; + + MPI_Init(NULL, NULL); + int prank, psize; + MPI_Comm_rank(MPI_COMM_WORLD, &prank); + MPI_Comm_size(MPI_COMM_WORLD, &psize); + auto t1 = clk::now(); + + auto ln = n/psize + (prank < n % psize ? 1 : 0); + + auto i_start = prank * ln + (prank < n % psize ? 0 : n % psize); + auto i_end = i_start + ln; + + /* calculate pi = integral [0..1] 4 / (1 + x**2) dx */ + dx = 1. / n; + double lsum = 0.0; + for (i = i_start; i < i_end; i++) { + x = 1. * i * dx; + lsum = lsum + f(x); + } + + double send, recv; + + send = sum = lsum; + + auto next = (prank + 1) % psize; + //auto prev = (prank - 1 + psize) % psize; + + MPI_Win win; + MPI_Win_create(&recv, static_cast(sizeof(double)), sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win); + + for (int p = 0 ; p < psize -1; ++p) { + MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPRECEDE, win); + MPI_Put(&send, 1, MPI_DOUBLE, next, static_cast(0), 1, MPI_DOUBLE, win); + MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, win); + + sum += recv; + send = recv; + } + + pi = dx * sum; + + MPI_Win_free(&win); + + second elapsed = clk::now() - t1; + + if (prank == 0) { + std::printf("computed pi = %.16g\n", pi); + std::printf("wall clock time (chrono) = %.4gs\n", elapsed.count()); + std::printf("n mpi process = %.d\n", psize); + + + for(int d = 1; d <= 15; ++d) { + std::printf("%d", digit(pi, d)); + } + } + + MPI_Finalize(); + + return 0; +} diff --git a/pi/solutions-mpi/pi_gather_bcast.cc b/pi/solutions-mpi/pi_gather_bcast.cc new file mode 100644 index 0000000..f488652 --- /dev/null +++ b/pi/solutions-mpi/pi_gather_bcast.cc @@ -0,0 +1,78 @@ +/* + This exercise is taken from the class Parallel Programming Workshop (MPI, + OpenMP and Advanced Topics) at HLRS given by Rolf Rabenseifner + */ +#include +#include +#include +#include +#include +#include + +using clk = std::chrono::high_resolution_clock; +using second = std::chrono::duration; +using time_point = std::chrono::time_point; + +inline int digit(double x, int n) { + return std::trunc(x * std::pow(10., n)) - std::trunc(x * std::pow(10., n - 1)) *10.; +} + +inline double f(double a) { return (4. / (1. + a * a)); } + +const int n = 10000000; + +int main(int /* argc */ , char ** /* argv */) { + int i; + double dx, x, sum, pi; + + MPI_Init(NULL, NULL); + int prank, psize; + MPI_Comm_rank(MPI_COMM_WORLD, &prank); + MPI_Comm_size(MPI_COMM_WORLD, &psize); + auto t1 = clk::now(); + + auto ln = n/psize + (prank < n % psize ? 1 : 0); + + auto i_start = prank * ln + (prank < n % psize ? 0 : n % psize); + auto i_end = i_start + ln; + + /* calculate pi = integral [0..1] 4 / (1 + x**2) dx */ + dx = 1. / n; + double lsum = 0.0; + for (i = i_start; i < i_end; i++) { + x = 1. * i * dx; + lsum = lsum + f(x); + } + + std::vector partial_sums; + if (prank == 0) { + partial_sums.resize(psize); + } + + MPI_Gather(&lsum, 1, MPI_DOUBLE, partial_sums.data(), 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + if (prank == 0) { + sum = std::accumulate(partial_sums.begin(), partial_sums.end(), 0); + } + + pi = dx * sum; + + MPI_Bcast(&sum, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + second elapsed = clk::now() - t1; + + if (prank == 0) { + std::printf("computed pi = %.16g\n", pi); + std::printf("wall clock time (chrono) = %.4gs\n", elapsed.count()); + std::printf("n mpi process = %.d\n", psize); + + + for(int d = 1; d <= 15; ++d) { + std::printf("%d", digit(pi, d)); + } + } + + MPI_Finalize(); + + return 0; +} diff --git a/pi/solutions-mpi/pi_io_at.cc b/pi/solutions-mpi/pi_io_at.cc new file mode 100644 index 0000000..6460702 --- /dev/null +++ b/pi/solutions-mpi/pi_io_at.cc @@ -0,0 +1,87 @@ +/* + This exercise is taken from the class Parallel Programming Workshop (MPI, + OpenMP and Advanced Topics) at HLRS given by Rolf Rabenseifner + */ + +#include +#include +#include +#include +#include + +using clk = std::chrono::high_resolution_clock; +using second = std::chrono::duration; +using time_point = std::chrono::time_point; + +inline int digit(double x, int n) { + return std::trunc(x * std::pow(10., n)) - std::trunc(x * std::pow(10., n - 1)) *10.; +} + +inline double f(double a) { return (4. / (1. + a * a)); } + +const int n = 10000000; + +int main(int /* argc */ , char ** /* argv */) { + int i; + double dx, x, sum, pi; + int psize, prank; + + MPI_Init(NULL, NULL); + + MPI_Comm_size(MPI_COMM_WORLD, &psize); + MPI_Comm_rank(MPI_COMM_WORLD, &prank); + + auto mpi_t1 = MPI_Wtime(); + auto t1 = clk::now(); + + int nlocal = n / psize; + int istart = 1 + nlocal * prank; + int iend = nlocal * (prank + 1); + + /* calculate pi = integral [0..1] 4 / (1 + x**2) dx */ + dx = 1. / n; + sum = 0.0; + for (i = istart; i <= iend; i++) { + x = (1. * i - 0.5) * dx; + sum = sum + f(x); + } + + MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + pi = dx * sum; + + auto mpi_elapsed = MPI_Wtime() - mpi_t1; + second elapsed = clk::now() - t1; + + if(prank == 0) { + std::printf("computed pi = %.16g\n", pi); + std::printf("wall clock time (mpi_wtime) = %.4gs with %d process\n", mpi_elapsed, psize); + std::printf("wall clock time (chrono) = %.4gs\n", elapsed.count()); + } + + char zero = '0'; + int ndigits = 16 / psize; + int dstart = ndigits * prank; + + std::vector digits(ndigits); + for (int d = 0; d < ndigits; ++d) { + digits[d] = zero + digit(pi, dstart + d); + } + + // open a file + MPI_File file; + MPI_File_open(MPI_COMM_WORLD, "pi.dat", MPI_MODE_WRONLY | MPI_MODE_CREATE, + MPI_INFO_NULL, &file); + MPI_File_set_size(file, 16); + + // write the vector with MPI_File_write_at + MPI_File_write_at(file, dstart, digits.data(), digits.size(), MPI_CHAR, + MPI_STATUS_IGNORE); + + // close the file + MPI_File_close(&file); + + MPI_Finalize(); + + return 0; +} diff --git a/pi/solutions-mpi/pi_p2p_derived_type.cc b/pi/solutions-mpi/pi_p2p_derived_type.cc new file mode 100644 index 0000000..67ff97e --- /dev/null +++ b/pi/solutions-mpi/pi_p2p_derived_type.cc @@ -0,0 +1,133 @@ +/* + This exercise is taken from the class Parallel Programming Workshop (MPI, + OpenMP and Advanced Topics) at HLRS given by Rolf Rabenseifner + */ +#include +#include +#include +#include + +using clk = std::chrono::high_resolution_clock; +using second = std::chrono::duration; +using time_point = std::chrono::time_point; + +inline int digit(double x, int n) { + return std::trunc(x * std::pow(10., n)) - std::trunc(x * std::pow(10., n - 1)) *10.; +} + +inline double f(double a) { return (4. / (1. + a * a)); } + +const int n = 10000000; + +struct Sum { + double sum; + int rank; + + Sum() = default; + virtual ~Sum() = default; + Sum(Sum && other) noexcept = default; + Sum(const Sum & other) = default; + Sum & operator=(Sum && other) noexcept = default; + Sum & operator=(const Sum & other) = default; + + Sum &operator+=(const Sum & other) { + sum += other.sum; + rank += other.rank; + return *this; + } +}; + +int main(int /* argc */ , char ** /* argv */) { + int i; + double dx, x, pi; + + MPI_Init(NULL, NULL); + int prank, psize; + MPI_Comm_rank(MPI_COMM_WORLD, &prank); + MPI_Comm_size(MPI_COMM_WORLD, &psize); + auto t1 = clk::now(); + + auto ln = n/psize + (prank < n % psize ? 1 : 0); + + auto i_start = prank * ln + (prank < n % psize ? 0 : n % psize); + auto i_end = i_start + ln; + + /* calculate pi = integral [0..1] 4 / (1 + x**2) dx */ + dx = 1. / n; + double lsum = 0.0; + for (i = i_start; i < i_end; i++) { + x = 1. * i * dx; + lsum = lsum + f(x); + } + + Sum send, recv, sum; + Sum send_buff[2]; + + + send.sum = sum.sum = lsum; + send.rank = sum.rank = prank; + + int blk_length[2] = {1, 1}; + + MPI_Aint zero_address, first_address, second_address; + MPI_Get_address(&send, &zero_address); + MPI_Get_address(&send.sum, &first_address); + MPI_Get_address(&send.rank, &second_address); + + MPI_Aint displs[2]; + displs[0] = MPI_Aint_diff(first_address, zero_address);; + displs[1] = MPI_Aint_diff(second_address, zero_address); + + MPI_Datatype types[2] = {MPI_DOUBLE, MPI_INT}; + MPI_Datatype Sum_mpi_t, Sum_array_mpi_t; + MPI_Type_create_struct(2, blk_length, displs, types, &Sum_mpi_t); + MPI_Type_commit(&Sum_mpi_t); + + MPI_Get_address(&(send_buff[0]), &first_address); + MPI_Get_address(&(send_buff[1]), &second_address); + + MPI_Aint lb = 0; + MPI_Aint extent = MPI_Aint_diff(second_address, first_address); + + MPI_Type_create_resized(Sum_mpi_t, lb, extent, &Sum_array_mpi_t); + + MPI_Request request_send; + + auto next = (prank + 1) % psize; + auto prev = (prank - 1 + psize) % psize; + + for (int p = 0 ; p < psize -1; ++p) { + MPI_Isend(&send, 1, Sum_mpi_t, next, 0, MPI_COMM_WORLD, + &request_send); + MPI_Recv(&recv, 1, Sum_mpi_t, prev, 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + sum += recv; + + MPI_Wait(&request_send, MPI_STATUS_IGNORE); + + send = recv; + } + + pi = dx * sum.sum; + + second elapsed = clk::now() - t1; + + if (prank == 0) { + std::printf("computed pi = %.16g\n", pi); + std::printf("sum of ranks = %d\n", sum.rank); + std::printf("wall clock time (chrono) = %.4gs\n", elapsed.count()); + std::printf("n mpi process = %.d\n", psize); + + + for(int d = 1; d <= 15; ++d) { + std::printf("%d", digit(pi, d)); + } + } + + MPI_Type_free(&Sum_mpi_t); + + MPI_Finalize(); + + return 0; +} diff --git a/poisson/solutions-cartesian/AUTHORS b/poisson/solutions-cartesian/AUTHORS new file mode 100644 index 0000000..9253635 --- /dev/null +++ b/poisson/solutions-cartesian/AUTHORS @@ -0,0 +1,4 @@ +Authors: + - Nicolas Richart + - Vincent Keller + . Vittoria Rezzonico diff --git a/poisson/solutions-cartesian/COPYING b/poisson/solutions-cartesian/COPYING new file mode 100644 index 0000000..e551330 --- /dev/null +++ b/poisson/solutions-cartesian/COPYING @@ -0,0 +1,24 @@ +Copyright (c) 2017, Ecole Polytechnique Federal de Lausanne - Scientific IT and +Application Support (SCITAS) All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/poisson/solutions-cartesian/Makefile b/poisson/solutions-cartesian/Makefile new file mode 100644 index 0000000..77a28f6 --- /dev/null +++ b/poisson/solutions-cartesian/Makefile @@ -0,0 +1,21 @@ +CXX=mpic++ +#CXX=g++ +LD=${CXX} +#VERIFY=-fsanitize=thread +VERIFY= -D_DEBUG -D_DEBUG_MPI +OPENMP=-fopenmp +NOOPENMP=-fno-openmp -Wno-unknown-pragmas +#OPTIM+=-O3 -D_MPI $(NOOPENMP) +OPTIM+=-O3 -D_MPI $(NOOPENMP) -march=native +CXXFLAGS+=-Wall -Wextra -Werror -pedantic -std=c++14 -g $(OPTIM) $(VERIFY) +LDFLAGS+=-lm $(CXXFLAGS) + +OBJS=poisson.o grid.o double_buffer.o simulation.o dumpers.o + +all: poisson + +poisson: $(OBJS) + $(LD) -o $@ $(OBJS) $(LDFLAGS) + +clean: + rm -f hello poisson *.o *~ diff --git a/poisson/solutions-cartesian/double_buffer.cc b/poisson/solutions-cartesian/double_buffer.cc new file mode 100644 index 0000000..978eda5 --- /dev/null +++ b/poisson/solutions-cartesian/double_buffer.cc @@ -0,0 +1,31 @@ +/* -------------------------------------------------------------------------- */ +#include "double_buffer.hh" +#include "grid.hh" +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +DoubleBuffer::DoubleBuffer(int m, int n) + : m_current(new Grid(m, n)), m_old(new Grid(m, n)) {} + +/* -------------------------------------------------------------------------- */ +Grid & DoubleBuffer::current() { return *m_current; } + +/* -------------------------------------------------------------------------- */ +Grid & DoubleBuffer::old() { return *m_old; } + +/* -------------------------------------------------------------------------- */ +const Grid & DoubleBuffer::current() const { return *m_current; } + +/* -------------------------------------------------------------------------- */ +const Grid & DoubleBuffer::old() const { return *m_old; } + +/* -------------------------------------------------------------------------- */ +void DoubleBuffer::swap() { + m_current.swap(m_old); +} + +/* -------------------------------------------------------------------------- */ +void DoubleBuffer::resize(int m, int n) { + m_current->resize(m, n); + m_old->resize(m, n); +} diff --git a/poisson/solutions-cartesian/double_buffer.hh b/poisson/solutions-cartesian/double_buffer.hh new file mode 100644 index 0000000..1d21b5c --- /dev/null +++ b/poisson/solutions-cartesian/double_buffer.hh @@ -0,0 +1,28 @@ +#ifndef DOUBLE_BUFFER +#define DOUBLE_BUFFER + +/* -------------------------------------------------------------------------- */ +#include +/* -------------------------------------------------------------------------- */ +#include "grid.hh" +/* -------------------------------------------------------------------------- */ + +class DoubleBuffer { +public: + DoubleBuffer(int m = 0 , int n = 0); + + void resize(int m, int n); + Grid & current(); + Grid & old(); + + const Grid & current() const; + const Grid & old() const; + + void swap(); +private: + + std::unique_ptr m_current; + std::unique_ptr m_old; +}; + +#endif /* DOUBLE_BUFFER */ diff --git a/poisson/solutions-cartesian/dumpers.cc b/poisson/solutions-cartesian/dumpers.cc new file mode 100644 index 0000000..c626753 --- /dev/null +++ b/poisson/solutions-cartesian/dumpers.cc @@ -0,0 +1,205 @@ +/* -------------------------------------------------------------------------- */ +#include "dumpers.hh" +#include "grid.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +void Dumper::set_min(float min) { m_min = min; } + +void Dumper::set_max(float max) { m_max = max; } + +float Dumper::min() const { return m_min; } + +float Dumper::max() const { return m_max; } + +/* -------------------------------------------------------------------------- */ +void DumperASCII::dump(const Grid & grid, int step) { + std::ofstream fout; + std::stringstream sfilename; + + sfilename << "out_" << std::setfill('0') << std::setw(5) << step << ".pgm"; + fout.open(sfilename.str()); + + int m = grid.m(); + int n = grid.n(); + + fout << "P2" << std::endl << "# CREATOR: Poisson program" << std::endl; + fout << m << " " << n << std::endl; + fout << 255 << std::endl; + + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + int v = 255. * (grid(i, j) - m_min) / (m_max - m_min); + v = std::min(v, 255); + fout << v << std::endl; + } + } +} + +/* -------------------------------------------------------------------------- */ +void DumperBinary::dump(const Grid & grid, int step) { + std::stringstream sfilename; + + std::array coords{0, 0}, dims{0, 0}; + + int prank{0}, psize{1}; + MPI_Comm_rank(m_communicator, &prank); + MPI_Comm_size(m_communicator, &psize); + + std::array periods{0, 0}; + MPI_Cart_get(m_communicator, dims.size(), dims.data(), periods.data(), coords.data()); + + sfilename << "out_" << std::setfill('0') << std::setw(5) << step << ".bmp"; + + int h = grid.m(); + int w = grid.n(); + int global_w = w; + + std::array starts = {0, 0}; + std::array local_sizes = {h, w}; + std::array global_sizes = local_sizes; + + // removing the ghosts + if(prank == 0) std::cout << dims[0] << " " << dims[1] << "\n"; + + for (std::size_t i = 0; i < local_sizes.size(); ++i) { + if (dims[i] > 1) { + local_sizes[i] -= (coords[i] == 0 || coords[i] == dims[i] - 1 ? 1 : 2); + starts[i] += (coords[i] == 0 ? 0 : 1); + } + } + + std::array comms; + + // determining the local offset + std::array offset = {0, 0}; + + std::vector> line_col_size(2); + + for (size_t i = 0; i < line_col_size.size(); ++i) { + std::array remain_dims = {false, false}; + remain_dims[i] = true; + MPI_Cart_sub(m_communicator, remain_dims.data(), &(comms[i])); + + line_col_size[i].resize(dims[i]); + MPI_Allgather(&local_sizes[i], 1, MPI_INT, line_col_size[i].data(), 1, + MPI_INT, comms[i]); + + global_sizes[i] = 0; + for (int j = 0; j < dims[i]; ++j) { + global_sizes[i] += line_col_size[i][j]; + } + } + + //for (int i = dims[0] - 1; i > coords[0]; --i) { + for (int i = 0; i < coords[0]; ++i) { + offset[0] += line_col_size[0][i]; + } + + for (int i = 0; i < coords[1]; ++i) { + offset[1] += line_col_size[1][i]; + } + + global_w = 4 * ((24 * global_sizes[1] + 31) / 32); + + // if the file width (3*w) is not a multiple of 4 adds enough bytes to make it + // a multiple of 4 + int padding = global_w - 3 * global_sizes[1]; + + w = 3 * local_sizes[1]; + + if (coords[1] == dims[1] - 1) { + w += padding; + } else { + padding = 0; + } + + int filesize = 54 + global_sizes[0] * global_w; + + std::vector img(local_sizes[0] * w); + std::fill(img.begin(), img.end(), 0); + + for (int i = 0; i < local_sizes[0]; i++) { + for (int j = 0; j < local_sizes[1]; j++) { + float v = + ((grid(i + starts[0], j + starts[1]) - m_min) / (m_max - m_min)); + + float r = v * 255; // Red channel + float g = v * 255; // Green channel + float b = prank * 255 / psize; // Red channel + + r = std::min(r, 255.f); + g = std::min(g, 255.f); + b = std::min(b, 255.f); + + img[w * i + 3 * j + 0] = r; + img[w * i + 3 * j + 1] = g; + img[w * i + 3 * j + 2] = b; + } + } + + auto write_bytes = [](auto && array, auto && value) { + array[0] = value; + array[1] = value >> 8; + array[2] = value >> 16; + array[3] = value >> 24; + }; + + std::array bmpfileheader = {'B', 'M', 0, 0, 0, 0, 0, + 0, 0, 0, 54, 0, 0, 0}; + std::array bmpinfoheader = {40, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 24, 0}; + + write_bytes(&bmpfileheader[2], filesize); + + write_bytes(&bmpinfoheader[4], global_sizes[1]); + write_bytes(&bmpinfoheader[8], global_sizes[0]); + write_bytes(&bmpinfoheader[20], filesize - 54); + + MPI_File fh; + MPI_Status status; + MPI_Datatype subarray_t; + +#if defined(_DEBUG_MPI) + for (int i = 0; i < psize; ++i) { + if (prank == i) + std::cerr << prank << " (" << coords[0] << ", " << coords[1] << ")" + << " - " << local_sizes[0] << "x" << local_sizes[1] << "(" << w + << ") + " << offset[0] << "x" << offset[1] << "("<< 3*offset[1]<<") ++ [" + << global_sizes[0] << "x" << global_sizes[1] << "(" << global_w + << ")] " << padding << "\n"; + MPI_Barrier(m_communicator); + } +#endif + + global_sizes[1] = global_w; + local_sizes[1] = w; + offset[1] *= 3; + + MPI_Type_create_subarray(global_sizes.size(), global_sizes.data(), + local_sizes.data(), offset.data(), MPI_ORDER_C, + MPI_CHAR, &subarray_t); + MPI_Type_commit(&subarray_t); + + // opening a file in write and create mode + MPI_File_open(MPI_COMM_WORLD, sfilename.str().c_str(), + MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh); + // defining the size of the file + MPI_File_set_size(fh, filesize); + + // rank 0 writes the header + if (prank == 0) { + MPI_File_write_at(fh, 0, bmpfileheader.data(), 14, MPI_CHAR, &status); + MPI_File_write_at(fh, 14, bmpinfoheader.data(), 40, MPI_CHAR, &status); + } + + MPI_File_set_view(fh, 54, MPI_CHAR, subarray_t, "native", MPI_INFO_NULL); + MPI_File_write(fh, img.data(), img.size(), MPI_CHAR, &status); + MPI_File_close(&fh); +} diff --git a/poisson/solutions-cartesian/dumpers.hh b/poisson/solutions-cartesian/dumpers.hh new file mode 100644 index 0000000..dc9b013 --- /dev/null +++ b/poisson/solutions-cartesian/dumpers.hh @@ -0,0 +1,58 @@ +#if defined(_MPI) +#include +#endif + +#ifndef DUMPERS_HH +#define DUMPERS_HH + +/* -------------------------------------------------------------------------- */ +class Grid; + +/* -------------------------------------------------------------------------- */ +class Dumper { +public: +#if defined(_MPI) + explicit Dumper(MPI_Comm communicator) + : m_min(-1.), m_max(1.), m_communicator(communicator) {} +#else + Dumper() : m_min(-1.), m_max(1.) {} +#endif + virtual void dump(const Grid & m_grid, int step) = 0; + + void set_min(float min); + void set_max(float max); + + float min() const; + float max() const; + +protected: + float m_min, m_max; +#if defined(_MPI) + MPI_Comm m_communicator; +#endif +}; + +/* -------------------------------------------------------------------------- */ +class DumperASCII : public Dumper { +public: +#if defined(_MPI) + explicit DumperASCII(MPI_Comm communicator) : Dumper(communicator) {} +#else + DumperASCII() : Dumper() {} +#endif + + virtual void dump(const Grid & grid, int step); +}; + +/* -------------------------------------------------------------------------- */ +class DumperBinary : public Dumper { +public: +#if defined(_MPI) + explicit DumperBinary(MPI_Comm communicator) : Dumper(communicator) {} +#else + DumperBinary() : Dumper() {} +#endif + virtual void dump(const Grid & grid, int step); +}; + +#endif /* DUMPERS_HH */ diff --git a/poisson/solutions-cartesian/grid.cc b/poisson/solutions-cartesian/grid.cc new file mode 100644 index 0000000..f507a41 --- /dev/null +++ b/poisson/solutions-cartesian/grid.cc @@ -0,0 +1,116 @@ +/* -------------------------------------------------------------------------- */ +#include "grid.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#if defined(_DEBUG_MPI) +#include +#endif +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +Grid::Grid(int m, int n) : m_m(m), m_n(n), m_storage(m * n) { clear(); } + +/* -------------------------------------------------------------------------- */ +Grid::~Grid() { + MPI_Type_free(&column_t); + MPI_Type_free(&line_t); +} + +/* -------------------------------------------------------------------------- */ +void Grid::clear() { std::fill(m_storage.begin(), m_storage.end(), 0.); } + +/* -------------------------------------------------------------------------- */ +void Grid::resize(int m, int n) { + m_m = m; + m_n = n; + m_storage.resize(m * n); +} + +/* -------------------------------------------------------------------------- */ +int Grid::m() const { return m_m; } +int Grid::n() const { return m_n; } + +/* -------------------------------------------------------------------------- */ +void Grid::initializeHaloCommunications(MPI_Comm & m_communicator, + const std::array & ghosts) { + std::array coords; + int prank; + + MPI_Type_vector(m_m - ghosts[0], 1, m_n, MPI_FLOAT, &column_t); + MPI_Type_vector(1, m_n - ghosts[1], 1, MPI_FLOAT, &line_t); + + MPI_Type_commit(&column_t); + MPI_Type_commit(&line_t); + + MPI_Comm_rank(m_communicator, &prank); + MPI_Cart_coords(m_communicator, prank, coords.size(), coords.data()); + + // determining the rank of the neighbors + int left, right, top, bottom; + MPI_Cart_shift(m_communicator, 1, 1, &left, &right); + MPI_Cart_shift(m_communicator, 0, 1, &top, &bottom); + +#if defined(_DEBUG_MPI) + static bool first = true; + if (first) { + int psize; + MPI_Comm_size(m_communicator, &psize); + for (int i = 0; i < psize; ++i) { + if (prank == i) { + std::cerr << prank << std::setw(7) << top << "\n" + << prank << std::setw(8) << "^\n" + << prank << std::setw(3) << left << " < . > " << right << "\n" + << prank << std::setw(8) << "v\n" + << prank << std::setw(7) << bottom << "\n\n"; + } + MPI_Barrier(m_communicator); + } + first = false; + } +#endif + + auto addr = [&](auto && i, auto && j) { return &(this->operator()(i, j)); }; + + MPI_Request * rrequest = m_requests.data(); + MPI_Request * srequest = m_requests.data() + 4; + + static int tag = 1; + /// preparing receives + { + auto send = addr(m_m - 2, 1); + auto recv = addr(0, 1); + MPI_Send_init(send, 1, line_t, bottom, tag, m_communicator, srequest); + MPI_Recv_init(recv, 1, line_t, top, tag, m_communicator, rrequest); + ++rrequest; + ++srequest; + ++tag; + } + { + auto send = addr(1, m_n - 2); + auto recv = addr(1, 0); + MPI_Send_init(send, 1, column_t, right, tag, m_communicator, srequest); + MPI_Recv_init(recv, 1, column_t, left, tag, m_communicator, rrequest); + ++rrequest; + ++srequest; + ++tag; + } + { + auto send = addr(1, 1); + auto recv = addr(m_m - 1, 1); + MPI_Send_init(send, 1, line_t, top, tag, m_communicator, srequest); + MPI_Recv_init(recv, 1, line_t, bottom, tag, m_communicator, rrequest); + ++rrequest; + ++srequest; + ++tag; + } + { + auto send = addr(1, 1); + auto recv = addr(1, m_n - 1); + MPI_Send_init(send, 1, column_t, left, tag, m_communicator, srequest); + MPI_Recv_init(recv, 1, column_t, right, tag, m_communicator, rrequest); + ++rrequest; + ++srequest; + ++tag; + } +} diff --git a/poisson/solutions-cartesian/grid.hh b/poisson/solutions-cartesian/grid.hh new file mode 100644 index 0000000..96e6e48 --- /dev/null +++ b/poisson/solutions-cartesian/grid.hh @@ -0,0 +1,73 @@ +#ifndef GRID_HH +#define GRID_HH + +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ +namespace { +struct DEBUG { +#if defined(DEBUG) + static void print(const std::string & func) { + std::cerr << func << "\n"; + } +#else + static void print(const std::string & ) { + + } +#endif +}; +} // namespace + +/* -------------------------------------------------------------------------- */ + +class Grid { +public: + Grid(int m = 0, int n = 0); + ~Grid(); + + /// access the value [i][j] of the grid + inline float & operator()(int i, int j) { return m_storage[i * m_n + j]; } + inline const float & operator()(int i, int j) const { + return m_storage[i * m_n + j]; + } + + void resize(int m, int n); + + /// set the grid to 0 + void clear(); + + int m() const; + int n() const; + + void initializeHaloCommunications(MPI_Comm & m_communicator, + const std::array & ghosts); + void startSynchronization() { + MPI_Request * requests = m_requests.data(); + MPI_Startall(8, requests); + } + + void waitReceives() { + auto requests = m_requests.data(); + MPI_Waitall(4, requests, MPI_STATUS_IGNORE); + } + + void waitSends() { + auto requests = m_requests.data(); + MPI_Waitall(4, requests + 4, MPI_STATUS_IGNORE); + } + +private: + int m_m, m_n; + std::vector m_storage; + + /// request for asynchronous communications + std::array m_requests; + + /// communication types + MPI_Datatype column_t, line_t; +}; + +#endif /* GRID_HH */ diff --git a/poisson/solutions-cartesian/optim.sub b/poisson/solutions-cartesian/optim.sub new file mode 100644 index 0000000..3e4440e --- /dev/null +++ b/poisson/solutions-cartesian/optim.sub @@ -0,0 +1,35 @@ +#!/bin/bash + +##SBATCH --reservation=phpc2017 +##SBATCH --account=phpc2017 +#SBATCH -N 1 +#SBATCH -n 1 + +OPTIMS="-O0 -O1 -O2 -O3" +N=512 + + +module load gcc + +echo gcc > gcc.out +for _opt in ${OPTIMS} '-O3 -ftree-vectorize' +do + make clean + make CXXFLAGS="-std=c++11 ${_opt}" CXX=g++ + ./poisson $N| cut -f4 -d " " >> gcc.out +done + + +module load intel + +echo icpc > intel.out +for _opt in ${OPTIMS} '-O3 -xHOST' +do + make clean + make CXXFLAGS="-std=c++11 ${_opt}" CXX=icpc + ./poisson $N | cut -f4 -d " " >> intel.out +done + +echo " ${OPTIMS} specific" | tr " " "\n" > header + +paste header gcc.out intel.out > compilers.out diff --git a/poisson/solutions-cartesian/poisson.cc b/poisson/solutions-cartesian/poisson.cc new file mode 100644 index 0000000..95f843b --- /dev/null +++ b/poisson/solutions-cartesian/poisson.cc @@ -0,0 +1,73 @@ +/* -------------------------------------------------------------------------- */ +#include "simulation.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ +#include +/* -------------------------------------------------------------------------- */ + +#define EPSILON 0.05 + +typedef std::chrono::high_resolution_clock clk; +typedef std::chrono::duration second; + +static void usage(const std::string & prog_name) { + std::cerr << prog_name << " " << std::endl; + exit(0); +} + +int main(int argc, char * argv[]) { + int prank{0}, psize{1}, nthreads{1}; + MPI_Init(NULL, NULL); + // assert(provided == MPI_THREAD_MULTIPLE); + + MPI_Comm_rank(MPI_COMM_WORLD, &prank); + MPI_Comm_size(MPI_COMM_WORLD, &psize); + + if (argc != 2) + usage(argv[0]); + + std::stringstream args(argv[1]); + int N; + args >> N; + + if (args.fail()) + usage(argv[0]); + + std::unique_ptr simu; + + simu = std::make_unique(N, N, MPI_COMM_WORLD); + + simu->set_initial_conditions(); + simu->set_epsilon(EPSILON); + + float l2; + int k; + + auto start = clk::now(); + std::tie(l2, k) = simu->compute(); + auto end = clk::now(); + + second time = end - start; + + auto min = simu->min(); + auto max = simu->max(); + + if (prank == 0) + std::cout << "MPI: " << psize << " (" << simu->dims(0) << ", " << simu->dims(1) << ")\n" + << "OMP: " << nthreads << "\n" + << "Grid: " << N << "x" << N << "\n" + << "Bound: " << "[ " << min << " : " << max << " ]" << "\n" + << "Iter: " << k << "\n" + << "Norm: " << std::scientific << l2 << "\n" + << "Time: " << time.count() << "s" << "\n" + << "Flops: " << simu->complexity()*k/time.count()/1e9 << " GFlops" << std::endl; + + simu.release(); // To force destruction of MPI related stuff + MPI_Finalize(); + + return 0; +} diff --git a/poisson/solutions-cartesian/poisson.sub b/poisson/solutions-cartesian/poisson.sub new file mode 100644 index 0000000..aa43198 --- /dev/null +++ b/poisson/solutions-cartesian/poisson.sub @@ -0,0 +1,31 @@ +#!/bin/bash + +##SBATCH --reservation=phpc2017 +##SBATCH --account=phpc2017 +#SBATCH -N 1 +#SBATCH -n 1 + +module load gcc + +rm -f complexity.dat + +for i in 128 256 512 1024 +do + ./poisson $i >> complexity.dat +done + +slmodules -r deprecated +module load gnuplot + +gnuplot < +#include +#include +#include +#include + +/* -------------------------------------------------------------------------- */ +Simulation::Simulation(int m, int n, MPI_Comm communicator) { + + // retrieving the number of proc and the rank in the proc pool + MPI_Comm_size(communicator, &m_psize); + + m_dims = {0, 0}; + std::array periods{0, 0}; + MPI_Dims_create(m_psize, m_dims.size(), m_dims.data()); + + assert(m_dims[0] * m_dims[1] == m_psize && "MPI_Dims_create is fucked up"); + + MPI_Cart_create(communicator, m_dims.size(), m_dims.data(), periods.data(), 0, + &m_communicator); + + MPI_Comm_rank(m_communicator, &m_prank); + + MPI_Cart_coords(m_communicator, m_prank, m_coords.size(), m_coords.data()); + + m_dumper = std::make_unique(m_communicator); + + m_global_size = {m, n}; + + std::array ghosts{0, 0}; + + for (std::size_t i = 0; i < m_local_size.size(); ++i) { + // computation of the local size of the grid the remainder is spread equally + // on the first processors + m_local_size[i] = m_global_size[i] / m_dims[i] + + (m_coords[i] < m_global_size[i] % m_dims[i] ? 1 : 0); + + // adding the ghosts lines if needed + if (m_dims[i] > 1) + ghosts[i] = (m_coords[i] == 0 || m_coords[i] == m_dims[i] - 1) ? 1 : 2; + + m_local_size[i] += ghosts[i]; + + // computing the offsets of the local grid in the global one + m_offset[i] = (m_global_size[i] / m_dims[i]) * m_coords[i] + + (m_coords[i] < m_global_size[i] % m_dims[i] + ? m_coords[i] + : m_global_size[i] % m_dims[i]); + + m_h[i] = 1. / m_global_size[i]; + } + +#if defined(_DEBUG_MPI) + for (int i = 0; i < m_psize; ++i) { + if (m_prank == i) + std::cerr << m_prank << " (" << m_coords[0] << ", " << m_coords[1] << ")" + << " - " << m_local_size[0] << "x" << m_local_size[1] << " + " + << m_offset[0] << "x" << m_offset[1] << " ++ [" << ghosts[0] + << "x" << ghosts[1] << "]\n"; + MPI_Barrier(m_communicator); + } +#endif + + // resizing the different grids + m_grids.resize(m_local_size[0], m_local_size[1]); + m_f.resize(m_local_size[0], m_local_size[1]); + + m_grids.current().initializeHaloCommunications(m_communicator, ghosts); + m_grids.old().initializeHaloCommunications(m_communicator, ghosts); +} + +/* -------------------------------------------------------------------------- */ +void Simulation::set_initial_conditions() { + std::array start = {0, 0}; + std::array end = m_local_size; + + for (int i = 0; i < 2; ++i) { + start[i] += (m_coords[i] == 0 ? 0 : 1); + end[i] -= (m_coords[i] == m_dims[i] - 1 ? 0 : 1); + } + + float min = 0., max = 0.; + float a = 5.5 * M_PI; + for (int i = start[0]; i < end[0]; i++) { + for (int j = start[1]; j < end[1]; j++) { + m_f(i, j) = + -2. * a * a * std::sin(a * (m_offset[0] + i - start[0]) * m_h[0]) * + std::sin(a * (m_offset[1] + j - start[1]) * m_h[1]) * m_h[0] * m_h[1]; + + min = std::min(min, m_f(i, j)); + max = std::max(max, m_f(i, j)); + } + } + + m_dumper->set_min(min); + m_dumper->set_max(max); + m_dumper->dump(m_f, 1); +} + +/* -------------------------------------------------------------------------- */ +std::tuple Simulation::compute() { + int s = 0; + + // std::cout << m_h_m << " " << m_h_n << " " << m_epsilon << std::endl; + float l2 = 0.; + + do { + l2 = compute_step(); + DEBUG::print(std::to_string(m_prank) + "do_step: " + std::to_string(l2)); + + m_grids.swap(); + + MPI_Allreduce(MPI_IN_PLACE, &l2, 1, MPI_FLOAT, MPI_SUM, m_communicator); + DEBUG::print(std::to_string(m_prank) + " MPI_Allreduce: " + std::to_string(l2) + " / " + std::to_string(m_epsilon)); + ++s; + + } while (l2 > m_epsilon); + + m_dumper->set_min(min()); + m_dumper->set_max(max()); + m_dumper->dump(m_grids.old(), 0); + + return std::make_tuple(std::sqrt(l2), s); +} + +/* -------------------------------------------------------------------------- */ +void Simulation::set_epsilon(float epsilon) { m_epsilon = epsilon * epsilon; } + +/* -------------------------------------------------------------------------- */ +float Simulation::epsilon() const { return std::sqrt(m_epsilon); } + +/* -------------------------------------------------------------------------- */ +inline float Simulation::compute_row(int i) { + float l2 = 0.; + + Grid & u = m_grids.current(); + Grid & uo = m_grids.old(); + + for (int j = 1; j < m_local_size[1] - 1; j++) { + // computation of the new step + u(i, j) = (uo(i - 1, j) + uo(i + 1, j) + uo(i, j - 1) + uo(i, j + 1) - + m_f(i, j)) / + 4.; + + // L2 norm + l2 += (uo(i, j) - u(i, j)) * (uo(i, j) - u(i, j)); + } + + return l2; +} + +/* -------------------------------------------------------------------------- */ +float Simulation::compute_step() { + float l2 = 0.; + + auto & uo = m_grids.old(); + auto & u = m_grids.current(); + + auto _compute = [&](auto && i, auto && j) { + u(i, j) = (uo(i - 1, j) + uo(i + 1, j) + uo(i, j - 1) + uo(i, j + 1) - + m_f(i, j)) / + 4.; + + l2 += (uo(i, j) - u(i, j)) * (uo(i, j) - u(i, j)); + }; + + // start the permanent communications + uo.startSynchronization(); + + // computing the inner rows that do not depend on the ghosts + for (int i = 2; i < m_local_size[0] - 2; i++) { + for (int j = 2; j < m_local_size[1] - 2; j++) { + _compute(i, j); + } + } + + /// wait to receive the ghosts before using them for them computation + uo.waitReceives(); + + for (int i = 2; i < m_local_size[1] - 2; i++) { + _compute(1, i); + } + + for (int i = 2; i < m_local_size[1] - 2; i++) { + _compute(m_local_size[0] - 2, i); + } + + for (int i = 2; i < m_local_size[0] - 2; i++) { + _compute(i, 1); + } + + for (int i = 2; i < m_local_size[0] - 2; i++) { + _compute(i, m_local_size[1] - 2); + } + + /// wait to send everything before changing the buffers + uo.waitSends(); + + return l2; +} + +namespace { +template +auto accumulate_grid(const G & grid, float init, Func && func) { + auto m = grid.m(); + auto n = grid.n(); + + for (decltype(m) i = 1; i < m - 1; i++) { + for (decltype(n) j = 1; j < n - 1; j++) { + init = func(init, grid(i, j)); + } + } + return init; +} +} // namespace + +/* -------------------------------------------------------------------------- */ +float Simulation::min() const { + const auto & u0 = m_grids.old(); + + auto tmp = accumulate_grid( + u0, u0(1, 1), [](auto && a, auto && b) { return std::min(a, b); }); + + MPI_Allreduce(MPI_IN_PLACE, &tmp, 1, MPI_FLOAT, MPI_MIN, m_communicator); + + return tmp; +} + +/* -------------------------------------------------------------------------- */ +float Simulation::max() const { + const auto & u0 = m_grids.old(); + + auto tmp = accumulate_grid( + u0, u0(1, 1), [](auto && a, auto && b) { return std::max(a, b); }); + + MPI_Allreduce(MPI_IN_PLACE, &tmp, 1, MPI_FLOAT, MPI_MAX, m_communicator); + + return tmp; +} + +/* -------------------------------------------------------------------------- */ diff --git a/poisson/solutions-cartesian/simulation.hh b/poisson/solutions-cartesian/simulation.hh new file mode 100644 index 0000000..5ff0a50 --- /dev/null +++ b/poisson/solutions-cartesian/simulation.hh @@ -0,0 +1,85 @@ +#ifndef SIMULATION_HH +#define SIMULATION_HH + +/* -------------------------------------------------------------------------- */ +#include "double_buffer.hh" +#include "dumpers.hh" +#include "grid.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +class Simulation { +public: + Simulation(int m, int n, MPI_Comm communicator); + + /// set the initial conditions, Dirichlet and source term + virtual void set_initial_conditions(); + + /// perform the simulation + std::tuple compute(); + + /// access the precision + void set_epsilon(float epsilon); + float epsilon() const; + + std::size_t complexity() { return 8 * m_global_size[0] * m_global_size[1]; } + + float min() const; + float max() const; + + float dims(std::size_t i) const { return m_dims[i]; } + +protected: + /// compute one step and return an error + virtual float compute_step(); + + /// compute one row + float compute_row(int i); + +private: + /// Global problem size + std::array m_global_size; + + /// Local problem size + std::array m_local_size; + + /// local offset + std::array m_offset; + + /// Cartesian dimensions + std::array m_dims; + + /// Cartesian coordinates + std::array m_coords; + + /// proc rank + int m_prank{0}; + + /// communicator size + int m_psize{1}; + + /// Precision to achieve + float m_epsilon; + + /// grid spacing + std::array m_h; + + /// Grids storage + DoubleBuffer m_grids; + + /// source term + Grid m_f; + + /// Dumper to use for outputs + std::unique_ptr m_dumper; + + /// communicator + MPI_Comm m_communicator; +}; + +#endif /* SIMULATION_HH */ diff --git a/poisson/solutions-persistent/Makefile b/poisson/solutions-persistent/Makefile new file mode 100644 index 0000000..77b7a64 --- /dev/null +++ b/poisson/solutions-persistent/Makefile @@ -0,0 +1,14 @@ +CXX=mpic++ +LD=${CXX} +CXXFLAGS+=-Wall -Wextra -Werror -pedantic -std=c++11 -O2 -g3 +LDFLAGS+=-lm $(CXXFLAGS) + +OBJS=poisson.o grid.o double_buffer.o simulation.o dumpers.o + +all: poisson + +poisson: $(OBJS) + $(LD) -o $@ $(OBJS) $(LDFLAGS) + +clean: + rm -f hello poisson *.o *~ diff --git a/poisson/solutions-persistent/compile_commands.json b/poisson/solutions-persistent/compile_commands.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/poisson/solutions-persistent/compile_commands.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/poisson/solutions-persistent/double_buffer.cc b/poisson/solutions-persistent/double_buffer.cc new file mode 100644 index 0000000..67749d0 --- /dev/null +++ b/poisson/solutions-persistent/double_buffer.cc @@ -0,0 +1,25 @@ +/* -------------------------------------------------------------------------- */ +#include "double_buffer.hh" +#include "grid.hh" +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +DoubleBuffer::DoubleBuffer(int m, int n) + : m_current(new Grid(m, n)), m_old(new Grid(m, n)) {} + +/* -------------------------------------------------------------------------- */ +Grid & DoubleBuffer::current() { return *m_current; } + +/* -------------------------------------------------------------------------- */ +Grid & DoubleBuffer::old() { return *m_old; } + +/* -------------------------------------------------------------------------- */ +void DoubleBuffer::swap() { + m_current.swap(m_old); +} + +/* -------------------------------------------------------------------------- */ +void DoubleBuffer::resize(int m, int n) { + m_current->resize(m, n); + m_old->resize(m, n); +} diff --git a/poisson/solutions-persistent/double_buffer.hh b/poisson/solutions-persistent/double_buffer.hh new file mode 100644 index 0000000..a05410b --- /dev/null +++ b/poisson/solutions-persistent/double_buffer.hh @@ -0,0 +1,25 @@ +#ifndef DOUBLE_BUFFER +#define DOUBLE_BUFFER + +/* -------------------------------------------------------------------------- */ +#include +/* -------------------------------------------------------------------------- */ +#include "grid.hh" +/* -------------------------------------------------------------------------- */ + +class DoubleBuffer { +public: + DoubleBuffer(int m = 0 , int n = 0); + + void resize(int m, int n); + Grid & current(); + Grid & old(); + + void swap(); +private: + + std::unique_ptr m_current; + std::unique_ptr m_old; +}; + +#endif /* DOUBLE_BUFFER */ diff --git a/poisson/solutions-persistent/dumpers.cc b/poisson/solutions-persistent/dumpers.cc new file mode 100644 index 0000000..3dc9abb --- /dev/null +++ b/poisson/solutions-persistent/dumpers.cc @@ -0,0 +1,160 @@ +/* -------------------------------------------------------------------------- */ +#include "dumpers.hh" +#include "grid.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +void Dumper::set_min(float min) { m_min = min; } + +void Dumper::set_max(float max) { m_max = max; } + +float Dumper::min() const { return m_min; } + +float Dumper::max() const { return m_max; } + +/* -------------------------------------------------------------------------- */ +void DumperASCII::dump(int step) { + std::ofstream fout; + std::stringstream sfilename; + + sfilename << "out_" << std::setfill('0') << std::setw(5) << step << ".pgm"; + fout.open(sfilename.str()); + + int m = m_grid.m(); + int n = m_grid.n(); + + fout << "P2" << std::endl << "# CREATOR: Poisson program" << std::endl; + fout << m << " " << n << std::endl; + fout << 255 << std::endl; + + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + int v = 255. * (m_grid(i, j) - m_min) / (m_max - m_min); + v = std::min(v, 255); + fout << v << std::endl; + } + } +} + +/* -------------------------------------------------------------------------- */ +void DumperBinary::dump(int step) { + std::ofstream fout; + std::stringstream sfilename; + + int prank, psize; + MPI_Comm_rank(m_communicator, &prank); + MPI_Comm_size(m_communicator, &psize); + + sfilename << "out_" << std::setfill('0') << std::setw(5) << step << ".bmp"; + fout.open(sfilename.str(), std::ios_base::binary); + + int h = m_grid.m(); + int w = m_grid.n(); + + // removing the ghosts from the height + if (psize > 1) + h = (prank == 0 || prank == psize - 1 ? h - 1 : h - 2); + int start = (prank == 0 ? 0 : 1); + + // Gathering the size of every processors, this could be done as in the + // constructor of the Simulation instead + std::vector size_per_proc(psize); + MPI_Allgather(&h, 1, MPI_INT, size_per_proc.data(), 1, MPI_INT, + m_communicator); + + // determining the local offset + int offset_h = 0; + for (int i = 0; i < prank; ++i) { + offset_h += size_per_proc[i]; + } + + int total_h = offset_h; + for (int i = prank; i < psize; ++i) { + total_h += size_per_proc[i]; + } + + // gathering all the data on the root processor + std::vector buffer; + if (prank == 0) { + std::vector displs(psize + 1); + displs[0] = 0; + for (int i = 0; i < psize; ++i) { + size_per_proc[i] *= w; + displs[i + 1] = displs[i] + size_per_proc[i]; + } + + buffer.resize(displs[psize]); + + MPI_Gatherv(&m_grid(start, 0), h * w, MPI_FLOAT, + buffer.data(), size_per_proc.data(), displs.data(), MPI_FLOAT, 0, m_communicator); + } else { + MPI_Gatherv(&m_grid(start, 0), h * w, MPI_FLOAT, + NULL, NULL, NULL, MPI_FLOAT, 0, m_communicator); + + return; + } + + // only processor 0 continues from here + + int row_size = 3 * w; + // if the file width (3*w) is not a multiple of 4 adds enough bytes to make it + // a multiple of 4 + int padding = (4 - (row_size) % 4) % 4; + row_size += padding; + + int filesize = 54 + (row_size)*total_h; + + std::vector img(row_size * total_h); + std::fill(img.begin(), img.end(), 0); + + for (int i = 0; i < total_h; i++) { + for (int j = 0; j < w; j++) { + float v = ((buffer[(total_h - 1 - i)*w + j] - m_min) / (m_max - m_min)); + + float r = v * 255; // Red channel + float g = v * 255; // Green channel + float b = v * 255; // Red channel + + r = std::min(r, 255.f); + g = std::min(g, 255.f); + b = std::min(b, 255.f); + + img[row_size * i + 3 * j + 2] = r; + img[row_size * i + 3 * j + 1] = g; + img[row_size * i + 3 * j + 0] = b; + } + } + + std::array bmpfileheader = {'B', 'M', 0, 0, 0, 0, 0, + 0, 0, 0, 54, 0, 0, 0}; + std::array bmpinfoheader = {40, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 24, 0}; + + bmpfileheader[2] = filesize; + bmpfileheader[3] = filesize >> 8; + bmpfileheader[4] = filesize >> 16; + bmpfileheader[5] = filesize >> 24; + + bmpinfoheader[4] = w; + bmpinfoheader[5] = w >> 8; + bmpinfoheader[6] = w >> 16; + bmpinfoheader[7] = w >> 24; + bmpinfoheader[8] = total_h; + bmpinfoheader[9] = total_h >> 8; + bmpinfoheader[10] = total_h >> 16; + bmpinfoheader[11] = total_h >> 24; + bmpinfoheader[20] = (filesize - 54); + bmpinfoheader[21] = (filesize - 54) >> 8; + bmpinfoheader[22] = (filesize - 54) >> 16; + bmpinfoheader[23] = (filesize - 54) >> 24; + + fout.write(bmpfileheader.data(), 14); + fout.write(bmpinfoheader.data(), 40); + + fout.write(img.data(), total_h * row_size); +} diff --git a/poisson/solutions-persistent/dumpers.hh b/poisson/solutions-persistent/dumpers.hh new file mode 100644 index 0000000..27a5f12 --- /dev/null +++ b/poisson/solutions-persistent/dumpers.hh @@ -0,0 +1,48 @@ +#include + +#ifndef DUMPERS_HH +#define DUMPERS_HH + +/* -------------------------------------------------------------------------- */ +class Grid; + +/* -------------------------------------------------------------------------- */ +class Dumper { +public: + explicit Dumper(const Grid & grid, MPI_Comm communicator) + : m_grid(grid), m_min(-1.), m_max(1.), m_communicator(communicator) {} + + virtual void dump(int step) = 0; + + void set_min(float min); + void set_max(float max); + + float min() const; + float max() const; + +protected: + const Grid & m_grid; + float m_min, m_max; + + MPI_Comm m_communicator; +}; + +/* -------------------------------------------------------------------------- */ +class DumperASCII : public Dumper { +public: + explicit DumperASCII(const Grid & grid, MPI_Comm communicator) + : Dumper(grid, communicator) {} + + virtual void dump(int step); +}; + +/* -------------------------------------------------------------------------- */ +class DumperBinary : public Dumper { +public: + explicit DumperBinary(const Grid & grid, MPI_Comm communicator) + : Dumper(grid, communicator) {} + + virtual void dump(int step); +}; + +#endif /* DUMPERS_HH */ diff --git a/poisson/solutions-persistent/grid.cc b/poisson/solutions-persistent/grid.cc new file mode 100644 index 0000000..bd635e9 --- /dev/null +++ b/poisson/solutions-persistent/grid.cc @@ -0,0 +1,24 @@ +/* -------------------------------------------------------------------------- */ +#include "grid.hh" +/* -------------------------------------------------------------------------- */ +#include +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +Grid::Grid(int m, int n) : m_m(m), m_n(n), m_storage(m * n) { clear(); } + +/* -------------------------------------------------------------------------- */ +void Grid::clear() { std::fill(m_storage.begin(), m_storage.end(), 0.); } + +/* -------------------------------------------------------------------------- */ +void Grid::resize(int m, int n) { + m_m = m; + m_n = n; + m_storage.resize(m*n); +} + +/* -------------------------------------------------------------------------- */ +int Grid::m() const { return m_m; } +int Grid::n() const { return m_n; } + +/* -------------------------------------------------------------------------- */ diff --git a/poisson/solutions-persistent/grid.hh b/poisson/solutions-persistent/grid.hh new file mode 100644 index 0000000..19c1698 --- /dev/null +++ b/poisson/solutions-persistent/grid.hh @@ -0,0 +1,42 @@ +#ifndef GRID_HH +#define GRID_HH + +/* -------------------------------------------------------------------------- */ +#include +#include +/* -------------------------------------------------------------------------- */ + +class Grid { +public: + Grid(int m = 0, int n = 0); + + ~Grid() { + for (auto & req : requests) { + MPI_Request_free(&req); + } + } + + /// access the value [i][j] of the grid + inline float & operator()(int i, int j) { return m_storage[i * m_n + j]; } + inline const float & operator()(int i, int j) const { + return m_storage[i * m_n + j]; + } + + void resize(int m, int n); + + std::vector & getRequests() { return requests; } + + /// set the grid to 0 + void clear(); + + int m() const; + int n() const; + +private: + int m_m, m_n; + std::vector m_storage; + + std::vector requests; +}; + +#endif /* GRID_HH */ diff --git a/poisson/solutions-persistent/optim.sub b/poisson/solutions-persistent/optim.sub new file mode 100644 index 0000000..3e4440e --- /dev/null +++ b/poisson/solutions-persistent/optim.sub @@ -0,0 +1,35 @@ +#!/bin/bash + +##SBATCH --reservation=phpc2017 +##SBATCH --account=phpc2017 +#SBATCH -N 1 +#SBATCH -n 1 + +OPTIMS="-O0 -O1 -O2 -O3" +N=512 + + +module load gcc + +echo gcc > gcc.out +for _opt in ${OPTIMS} '-O3 -ftree-vectorize' +do + make clean + make CXXFLAGS="-std=c++11 ${_opt}" CXX=g++ + ./poisson $N| cut -f4 -d " " >> gcc.out +done + + +module load intel + +echo icpc > intel.out +for _opt in ${OPTIMS} '-O3 -xHOST' +do + make clean + make CXXFLAGS="-std=c++11 ${_opt}" CXX=icpc + ./poisson $N | cut -f4 -d " " >> intel.out +done + +echo " ${OPTIMS} specific" | tr " " "\n" > header + +paste header gcc.out intel.out > compilers.out diff --git a/poisson/solutions-persistent/poisson.cc b/poisson/solutions-persistent/poisson.cc new file mode 100644 index 0000000..d9fbd8d --- /dev/null +++ b/poisson/solutions-persistent/poisson.cc @@ -0,0 +1,66 @@ +/* -------------------------------------------------------------------------- */ +#include "simulation.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ +#include +#include +/* -------------------------------------------------------------------------- */ + +#define EPSILON 0.005 + +typedef std::chrono::high_resolution_clock clk; +typedef std::chrono::duration second; + +static void usage(const std::string & prog_name) { + std::cerr << prog_name << " " << std::endl; + exit(0); +} + + +int main(int argc, char * argv[]) { + MPI_Init(&argc, &argv); + int prank, psize; + + MPI_Comm_rank(MPI_COMM_WORLD, &prank); + MPI_Comm_size(MPI_COMM_WORLD, &psize); + + if (argc != 2) usage(argv[0]); + + std::stringstream args(argv[1]); + int N; + args >> N; + + if(args.fail()) usage(argv[0]); + + auto * simu = new Simulation(N, N, MPI_COMM_WORLD); + + simu->set_initial_conditions(); + + simu->set_epsilon(EPSILON); + + float l2; + int k; + + auto start = clk::now(); + std::tie(l2, k) = simu->compute(); + auto end = clk::now(); + + second time = end - start; + + if(prank == 0) + std::cout << psize << " " << N << " " + << k << " " << std::scientific << l2 << " " + << time.count() << std::endl; + + delete simu; + + MPI_Finalize(); + + return 0; +} diff --git a/poisson/solutions-persistent/poisson.sub b/poisson/solutions-persistent/poisson.sub new file mode 100644 index 0000000..aa43198 --- /dev/null +++ b/poisson/solutions-persistent/poisson.sub @@ -0,0 +1,31 @@ +#!/bin/bash + +##SBATCH --reservation=phpc2017 +##SBATCH --account=phpc2017 +#SBATCH -N 1 +#SBATCH -n 1 + +module load gcc + +rm -f complexity.dat + +for i in 128 256 512 1024 +do + ./poisson $i >> complexity.dat +done + +slmodules -r deprecated +module load gnuplot + +gnuplot < +#include +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +Simulation::Simulation(int m, int n, MPI_Comm communicator) + : m_global_m(m), m_global_n(n), m_epsilon(1e-7), m_h_m(1. / m), + m_h_n(1. / n), m_dumper(new DumperBinary(m_grids.old(), communicator)), + m_communicator(communicator) { + + // retrieving the number of proc and the rank in the proc pool + MPI_Comm_rank(m_communicator, &m_prank); + MPI_Comm_size(m_communicator, &m_psize); + + // computation of the local size of the grid the remainder is spread equally + // on the first processors + m_local_m = m / m_psize + (m_prank < m % m_psize ? 1 : 0); + m_local_n = n; + + // adding the ghosts lines if needed + if (m_psize > 1) + m_local_m += (m_prank == 0 || m_prank == m_psize - 1) ? 1 : 2; + + // computing the offsets of the local grid in the global one + m_offset_m = + (m / m_psize) * m_prank + (m_prank < m % m_psize ? m_prank : m % m_psize); + m_offset_n = 0; + + // resizing the different grids + m_grids.resize(m_local_m, m_local_n); + m_f.resize(m_local_m, m_local_n); + + // determining the rank of the neighbors + m_north_prank = (m_prank == 0 ? MPI_PROC_NULL : m_prank - 1); + m_south_prank = (m_prank == (m_psize - 1) ? MPI_PROC_NULL : m_prank + 1); + + auto create_persistent_comm = [&] (Grid & g) { + auto & requests = g.getRequests(); + requests.resize(4); + + // Posting the receives requests + MPI_Recv_init(&g(0, 0), m_local_n, MPI_FLOAT, m_north_prank, 0, + m_communicator, &requests[0]); + MPI_Recv_init(&g(m_local_m - 1, 0), m_local_n, MPI_FLOAT, m_south_prank, 0, + m_communicator, &requests[1]); + + // posting send requests + MPI_Send_init(&g(1, 0), m_local_n, MPI_FLOAT, m_north_prank, 0, + m_communicator, &requests[2]); + MPI_Send_init(&g(m_local_m - 2, 0), m_local_n, MPI_FLOAT, m_south_prank, 0, + m_communicator, &requests[3]); + }; + + create_persistent_comm(m_grids.old()); + create_persistent_comm(m_grids.current()); + + // Some info if needed to debug + // std::cout << m_prank << " " << m_global_m << " " << m_global_n << " " + // << m_local_m << " " << m_local_n << " " << m_offset_m << " " + // << m_offset_n << " " << m_north_prank << " " << m_south_prank + // << std::endl; +} + +/* -------------------------------------------------------------------------- */ +void Simulation::set_initial_conditions() { + int i_start = (m_prank == 0 ? 0 : 1); + int i_end = (m_prank == m_psize - 1 ? m_local_m : m_local_m - 1); + + for (int i = i_start; i < i_end; i++) { + for (int j = 0; j < m_local_n; j++) { + m_f(i, j) = -2. * 100. * M_PI * M_PI * + std::sin(10. * M_PI * (m_offset_m + i - i_start) * m_h_m) * + std::sin(10. * M_PI * (m_offset_n + j) * m_h_n); + } + } +} + +/* -------------------------------------------------------------------------- */ +std::tuple Simulation::compute() { + int s = 0; + float l2 = 0; + do { + l2 = compute_step(); + + m_grids.swap(); + + // m_dumper->dump(s); + ++s; + } while (l2 > m_epsilon); + + return std::make_tuple(std::sqrt(l2), s); +} + +/* -------------------------------------------------------------------------- */ +void Simulation::set_epsilon(float epsilon) { m_epsilon = epsilon * epsilon; } + +/* -------------------------------------------------------------------------- */ +float Simulation::epsilon() const { return m_epsilon; } + +/* -------------------------------------------------------------------------- */ +inline float Simulation::compute_row(int i) { + float l2 = 0; + + Grid & u = m_grids.current(); + Grid & uo = m_grids.old(); + + for (int j = 1; j < m_local_n - 1; j++) { + // computation of the new step + u(i, j) = 0.25 * (uo(i - 1, j) + uo(i + 1, j) + uo(i, j - 1) + + uo(i, j + 1) - m_f(i, j) * m_h_m * m_h_n); + + // L2 norm + l2 += (uo(i, j) - u(i, j)) * (uo(i, j) - u(i, j)); + } + + return l2; +} + +/* -------------------------------------------------------------------------- */ +float Simulation::compute_step() { + float l2 = 0.; + + Grid & uo = m_grids.old(); + auto & requests = uo.getRequests(); + MPI_Startall(requests.size(), requests.data()); + + // computing the inner rows that do not depend on the ghosts + for (int i = 2; i < m_local_m - 2; i++) { + l2 += compute_row(i); + } + + /// wait to receive the ghosts before using them for them computation + MPI_Waitall(2, requests.data(), MPI_STATUS_IGNORE); + + // computing the line that depends on the ghosts + l2 += compute_row(1); + l2 += compute_row(m_local_m - 2); + + /// wait to send everything before changing the buffers + MPI_Waitall(2, requests.data() + 2, MPI_STATUS_IGNORE); + + // Summing the value of all the processors together + MPI_Allreduce(MPI_IN_PLACE, &l2, 1, MPI_FLOAT, MPI_SUM, m_communicator); + + return l2; +} diff --git a/poisson/solutions-persistent/simulation.hh b/poisson/solutions-persistent/simulation.hh new file mode 100644 index 0000000..e7d0e41 --- /dev/null +++ b/poisson/solutions-persistent/simulation.hh @@ -0,0 +1,78 @@ +#ifndef SIMULATION_HH +#define SIMULATION_HH + +/* -------------------------------------------------------------------------- */ +#include "double_buffer.hh" +#include "dumpers.hh" +#include "grid.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +class Simulation { +public: + Simulation(int m, int n, MPI_Comm communicator); + + virtual ~Simulation() = default; + + /// set the initial conditions, Dirichlet and source term + virtual void set_initial_conditions(); + + /// perform the simulation + std::tuple compute(); + + /// access the precision + void set_epsilon(float epsilon); + float epsilon() const; + +protected: + /// compute one step and return an error + virtual float compute_step(); + + /// compute one row + float compute_row(int i); + +private: + /// Global problem size + int m_global_m, m_global_n; + + /// Local problem size + int m_local_m, m_local_n; + + /// local offset + int m_offset_m, m_offset_n; + + /// local neighbors + int m_north_prank, m_south_prank; + + /// proc rank + int m_prank; + + /// communicator size + int m_psize; + + /// Precision to achieve + float m_epsilon; + + /// grid spacing + float m_h_m; + float m_h_n; + + /// Grids storage + DoubleBuffer m_grids; + + /// source term + Grid m_f; + + /// Dumper to use for outputs + std::unique_ptr m_dumper; + + /// communicator + MPI_Comm m_communicator; +}; + +#endif /* SIMULATION_HH */