diff --git a/poisson/.solutions-persistent/Makefile b/poisson/.solutions-persistent/Makefile new file mode 100644 index 0000000..c4cbf98 --- /dev/null +++ b/poisson/.solutions-persistent/Makefile @@ -0,0 +1,14 @@ +CXX=mpiicpc +LD=${CXX} +CXXFLAGS+=-Wall -Wextra -Werror -pedantic -std=c++11 -O3 +LDFLAGS+=-lm $(CXXFLAGS) + +OBJS=poisson.o grid.o double_buffer.o simulation.o dumpers.o + +all: poisson + +poisson: $(OBJS) + $(LD) -o $@ $(OBJS) $(LDFLAGS) + +clean: + rm -f hello poisson *.o *~ diff --git a/poisson/.solutions-persistent/double_buffer.cc b/poisson/.solutions-persistent/double_buffer.cc new file mode 100644 index 0000000..e3c560b --- /dev/null +++ b/poisson/.solutions-persistent/double_buffer.cc @@ -0,0 +1,25 @@ +/* -------------------------------------------------------------------------- */ +#include "double_buffer.hh" +#include "grid.hh" +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +DoubleBuffer::DoubleBuffer(int m, int n, MPI_Comm communicator) + : m_current(new Grid(m, n)), m_old(new Grid(m, n, communicator)) {} + +/* -------------------------------------------------------------------------- */ +Grid & DoubleBuffer::current() { return *m_current; } + +/* -------------------------------------------------------------------------- */ +Grid & DoubleBuffer::old() { return *m_old; } + +/* -------------------------------------------------------------------------- */ +void DoubleBuffer::swap() { + m_current.swap(m_old); +} + +/* -------------------------------------------------------------------------- */ +void DoubleBuffer::resize(int m, int n) { + m_current->resize(m, n); + m_old->resize(m, n); +} diff --git a/poisson/.solutions-persistent/double_buffer.hh b/poisson/.solutions-persistent/double_buffer.hh new file mode 100644 index 0000000..b524271 --- /dev/null +++ b/poisson/.solutions-persistent/double_buffer.hh @@ -0,0 +1,26 @@ +#ifndef DOUBLE_BUFFER +#define DOUBLE_BUFFER + +/* -------------------------------------------------------------------------- */ +#include +#include +/* -------------------------------------------------------------------------- */ +#include "grid.hh" +/* -------------------------------------------------------------------------- */ + +class DoubleBuffer { +public: + DoubleBuffer(int m = 0 , int n = 0, MPI_Comm communicator = MPI_COMM_WORLD); + + void resize(int m, int n); + Grid & current(); + Grid & old(); + + void swap(); +private: + + std::unique_ptr m_current; + std::unique_ptr m_old; +}; + +#endif /* DOUBLE_BUFFER */ diff --git a/poisson/.solutions-persistent/dumpers.cc b/poisson/.solutions-persistent/dumpers.cc new file mode 100644 index 0000000..3dc9abb --- /dev/null +++ b/poisson/.solutions-persistent/dumpers.cc @@ -0,0 +1,160 @@ +/* -------------------------------------------------------------------------- */ +#include "dumpers.hh" +#include "grid.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +void Dumper::set_min(float min) { m_min = min; } + +void Dumper::set_max(float max) { m_max = max; } + +float Dumper::min() const { return m_min; } + +float Dumper::max() const { return m_max; } + +/* -------------------------------------------------------------------------- */ +void DumperASCII::dump(int step) { + std::ofstream fout; + std::stringstream sfilename; + + sfilename << "out_" << std::setfill('0') << std::setw(5) << step << ".pgm"; + fout.open(sfilename.str()); + + int m = m_grid.m(); + int n = m_grid.n(); + + fout << "P2" << std::endl << "# CREATOR: Poisson program" << std::endl; + fout << m << " " << n << std::endl; + fout << 255 << std::endl; + + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + int v = 255. * (m_grid(i, j) - m_min) / (m_max - m_min); + v = std::min(v, 255); + fout << v << std::endl; + } + } +} + +/* -------------------------------------------------------------------------- */ +void DumperBinary::dump(int step) { + std::ofstream fout; + std::stringstream sfilename; + + int prank, psize; + MPI_Comm_rank(m_communicator, &prank); + MPI_Comm_size(m_communicator, &psize); + + sfilename << "out_" << std::setfill('0') << std::setw(5) << step << ".bmp"; + fout.open(sfilename.str(), std::ios_base::binary); + + int h = m_grid.m(); + int w = m_grid.n(); + + // removing the ghosts from the height + if (psize > 1) + h = (prank == 0 || prank == psize - 1 ? h - 1 : h - 2); + int start = (prank == 0 ? 0 : 1); + + // Gathering the size of every processors, this could be done as in the + // constructor of the Simulation instead + std::vector size_per_proc(psize); + MPI_Allgather(&h, 1, MPI_INT, size_per_proc.data(), 1, MPI_INT, + m_communicator); + + // determining the local offset + int offset_h = 0; + for (int i = 0; i < prank; ++i) { + offset_h += size_per_proc[i]; + } + + int total_h = offset_h; + for (int i = prank; i < psize; ++i) { + total_h += size_per_proc[i]; + } + + // gathering all the data on the root processor + std::vector buffer; + if (prank == 0) { + std::vector displs(psize + 1); + displs[0] = 0; + for (int i = 0; i < psize; ++i) { + size_per_proc[i] *= w; + displs[i + 1] = displs[i] + size_per_proc[i]; + } + + buffer.resize(displs[psize]); + + MPI_Gatherv(&m_grid(start, 0), h * w, MPI_FLOAT, + buffer.data(), size_per_proc.data(), displs.data(), MPI_FLOAT, 0, m_communicator); + } else { + MPI_Gatherv(&m_grid(start, 0), h * w, MPI_FLOAT, + NULL, NULL, NULL, MPI_FLOAT, 0, m_communicator); + + return; + } + + // only processor 0 continues from here + + int row_size = 3 * w; + // if the file width (3*w) is not a multiple of 4 adds enough bytes to make it + // a multiple of 4 + int padding = (4 - (row_size) % 4) % 4; + row_size += padding; + + int filesize = 54 + (row_size)*total_h; + + std::vector img(row_size * total_h); + std::fill(img.begin(), img.end(), 0); + + for (int i = 0; i < total_h; i++) { + for (int j = 0; j < w; j++) { + float v = ((buffer[(total_h - 1 - i)*w + j] - m_min) / (m_max - m_min)); + + float r = v * 255; // Red channel + float g = v * 255; // Green channel + float b = v * 255; // Red channel + + r = std::min(r, 255.f); + g = std::min(g, 255.f); + b = std::min(b, 255.f); + + img[row_size * i + 3 * j + 2] = r; + img[row_size * i + 3 * j + 1] = g; + img[row_size * i + 3 * j + 0] = b; + } + } + + std::array bmpfileheader = {'B', 'M', 0, 0, 0, 0, 0, + 0, 0, 0, 54, 0, 0, 0}; + std::array bmpinfoheader = {40, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 24, 0}; + + bmpfileheader[2] = filesize; + bmpfileheader[3] = filesize >> 8; + bmpfileheader[4] = filesize >> 16; + bmpfileheader[5] = filesize >> 24; + + bmpinfoheader[4] = w; + bmpinfoheader[5] = w >> 8; + bmpinfoheader[6] = w >> 16; + bmpinfoheader[7] = w >> 24; + bmpinfoheader[8] = total_h; + bmpinfoheader[9] = total_h >> 8; + bmpinfoheader[10] = total_h >> 16; + bmpinfoheader[11] = total_h >> 24; + bmpinfoheader[20] = (filesize - 54); + bmpinfoheader[21] = (filesize - 54) >> 8; + bmpinfoheader[22] = (filesize - 54) >> 16; + bmpinfoheader[23] = (filesize - 54) >> 24; + + fout.write(bmpfileheader.data(), 14); + fout.write(bmpinfoheader.data(), 40); + + fout.write(img.data(), total_h * row_size); +} diff --git a/poisson/.solutions-persistent/dumpers.hh b/poisson/.solutions-persistent/dumpers.hh new file mode 100644 index 0000000..27a5f12 --- /dev/null +++ b/poisson/.solutions-persistent/dumpers.hh @@ -0,0 +1,48 @@ +#include + +#ifndef DUMPERS_HH +#define DUMPERS_HH + +/* -------------------------------------------------------------------------- */ +class Grid; + +/* -------------------------------------------------------------------------- */ +class Dumper { +public: + explicit Dumper(const Grid & grid, MPI_Comm communicator) + : m_grid(grid), m_min(-1.), m_max(1.), m_communicator(communicator) {} + + virtual void dump(int step) = 0; + + void set_min(float min); + void set_max(float max); + + float min() const; + float max() const; + +protected: + const Grid & m_grid; + float m_min, m_max; + + MPI_Comm m_communicator; +}; + +/* -------------------------------------------------------------------------- */ +class DumperASCII : public Dumper { +public: + explicit DumperASCII(const Grid & grid, MPI_Comm communicator) + : Dumper(grid, communicator) {} + + virtual void dump(int step); +}; + +/* -------------------------------------------------------------------------- */ +class DumperBinary : public Dumper { +public: + explicit DumperBinary(const Grid & grid, MPI_Comm communicator) + : Dumper(grid, communicator) {} + + virtual void dump(int step); +}; + +#endif /* DUMPERS_HH */ diff --git a/poisson/.solutions-persistent/grid.cc b/poisson/.solutions-persistent/grid.cc new file mode 100644 index 0000000..b4c8ebd --- /dev/null +++ b/poisson/.solutions-persistent/grid.cc @@ -0,0 +1,49 @@ +/* -------------------------------------------------------------------------- */ +#include "grid.hh" +/* -------------------------------------------------------------------------- */ +#include +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +Grid::Grid(int m, int n, MPI_Comm communicator) : + m_m(m), m_n(n), m_storage(m * n), m_communicator(communicator) { + clear(); + + // retrieving the number of proc and the rank in the proc pool + MPI_Comm_rank(m_communicator, &m_prank); + MPI_Comm_size(m_communicator, &m_psize); + + // determining the rank of the neighbors + m_north_prank = (m_prank == 0 ? MPI_PROC_NULL : m_prank - 1); + m_south_prank = (m_prank == (m_psize - 1) ? MPI_PROC_NULL : m_prank + 1); +} + +/* -------------------------------------------------------------------------- */ +void Grid::clear() { std::fill(m_storage.begin(), m_storage.end(), 0.); } + +/* -------------------------------------------------------------------------- */ +void Grid::resize(int m, int n) { + m_m = m; + m_n = n; + m_storage.resize(m*n); +} + +void Grid::init_communications() { + // Posting the receives requests + MPI_Recv_init(&(*this)(0, 0), m_n, MPI_FLOAT, m_north_prank, 0, + m_communicator, &m_requests[0]); + MPI_Recv_init(&(*this)(m_m - 1, 0), m_n, MPI_FLOAT, m_south_prank, 0, + m_communicator, &m_requests[1]); + + // posting send requests + MPI_Send_init(&(*this)(1, 0), m_n, MPI_FLOAT, m_north_prank, 0, + m_communicator, &m_requests[2]); + MPI_Send_init(&(*this)(m_m - 2, 0), m_n, MPI_FLOAT, m_south_prank, 0, + m_communicator, &m_requests[3]); +} + +/* -------------------------------------------------------------------------- */ +int Grid::m() const { return m_m; } +int Grid::n() const { return m_n; } + +/* -------------------------------------------------------------------------- */ diff --git a/poisson/.solutions-persistent/grid.hh b/poisson/.solutions-persistent/grid.hh new file mode 100644 index 0000000..36c7336 --- /dev/null +++ b/poisson/.solutions-persistent/grid.hh @@ -0,0 +1,60 @@ +#ifndef GRID_HH +#define GRID_HH + +/* -------------------------------------------------------------------------- */ +#include +#include +#include +/* -------------------------------------------------------------------------- */ + +class Grid { +public: + Grid(int m = 0, int n = 0, MPI_Comm communicator = MPI_COMM_WORLD); + + /// access the value [i][j] of the grid + inline float & operator()(int i, int j) { return m_storage[i * m_n + j]; } + inline const float & operator()(int i, int j) const { + return m_storage[i * m_n + j]; + } + + void resize(int m, int n); + + void start_all_communications() { + MPI_Startall(4, m_requests.data()); + } + + void wait_recv() { + MPI_Waitall(2, m_requests.data(), MPI_STATUS_IGNORE); + } + + void wait_send() { + MPI_Waitall(2, m_requests.data() + 2, MPI_STATUS_IGNORE); + } + + void init_communications(); + + /// set the grid to 0 + void clear(); + + int m() const; + int n() const; + +private: + int m_m, m_n; + std::vector m_storage; + MPI_Comm m_communicator; + + /// local neighbors + int m_north_prank, m_south_prank; + + /// proc rank + int m_prank; + + /// communicator size + int m_psize; + + /// request for asynchronous communications + std::array m_requests; +}; + +#endif /* GRID_HH */ diff --git a/poisson/.solutions-persistent/optim.sub b/poisson/.solutions-persistent/optim.sub new file mode 100644 index 0000000..3e4440e --- /dev/null +++ b/poisson/.solutions-persistent/optim.sub @@ -0,0 +1,35 @@ +#!/bin/bash + +##SBATCH --reservation=phpc2017 +##SBATCH --account=phpc2017 +#SBATCH -N 1 +#SBATCH -n 1 + +OPTIMS="-O0 -O1 -O2 -O3" +N=512 + + +module load gcc + +echo gcc > gcc.out +for _opt in ${OPTIMS} '-O3 -ftree-vectorize' +do + make clean + make CXXFLAGS="-std=c++11 ${_opt}" CXX=g++ + ./poisson $N| cut -f4 -d " " >> gcc.out +done + + +module load intel + +echo icpc > intel.out +for _opt in ${OPTIMS} '-O3 -xHOST' +do + make clean + make CXXFLAGS="-std=c++11 ${_opt}" CXX=icpc + ./poisson $N | cut -f4 -d " " >> intel.out +done + +echo " ${OPTIMS} specific" | tr " " "\n" > header + +paste header gcc.out intel.out > compilers.out diff --git a/poisson/.solutions-persistent/poisson.cc b/poisson/.solutions-persistent/poisson.cc new file mode 100644 index 0000000..f8e402d --- /dev/null +++ b/poisson/.solutions-persistent/poisson.cc @@ -0,0 +1,63 @@ +/* -------------------------------------------------------------------------- */ +#include "simulation.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ +#include +#include +/* -------------------------------------------------------------------------- */ + +#define EPSILON 0.005 + +typedef std::chrono::high_resolution_clock clk; +typedef std::chrono::duration second; + +static void usage(const std::string & prog_name) { + std::cerr << prog_name << " " << std::endl; + exit(0); +} + + +int main(int argc, char * argv[]) { + MPI_Init(&argc, &argv); + int prank, psize; + + MPI_Comm_rank(MPI_COMM_WORLD, &prank); + MPI_Comm_size(MPI_COMM_WORLD, &psize); + + if (argc != 2) usage(argv[0]); + + std::stringstream args(argv[1]); + int N; + args >> N; + + if(args.fail()) usage(argv[0]); + + Simulation simu(N, N, MPI_COMM_WORLD); + + simu.set_initial_conditions(); + + simu.set_epsilon(EPSILON); + + float l2; + int k; + + auto start = clk::now(); + std::tie(l2, k) = simu.compute(); + auto end = clk::now(); + + second time = end - start; + + if(prank == 0) + std::cout << psize << " " << N << " " + << k << " " << std::scientific << l2 << " " + << time.count() << std::endl; + + MPI_Finalize(); + + return 0; +} diff --git a/poisson/.solutions-persistent/poisson.sub b/poisson/.solutions-persistent/poisson.sub new file mode 100644 index 0000000..aa43198 --- /dev/null +++ b/poisson/.solutions-persistent/poisson.sub @@ -0,0 +1,31 @@ +#!/bin/bash + +##SBATCH --reservation=phpc2017 +##SBATCH --account=phpc2017 +#SBATCH -N 1 +#SBATCH -n 1 + +module load gcc + +rm -f complexity.dat + +for i in 128 256 512 1024 +do + ./poisson $i >> complexity.dat +done + +slmodules -r deprecated +module load gnuplot + +gnuplot < +#include +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +Simulation::Simulation(int m, int n, MPI_Comm communicator) + : m_global_m(m), m_global_n(n), m_epsilon(1e-7), m_h_m(1. / m), + m_h_n(1. / n), m_grids(0, 0, communicator), m_f(0,0, communicator), + m_dumper(new DumperBinary(m_grids.old(), communicator)), + m_communicator(communicator) { + + // retrieving the number of proc and the rank in the proc pool + MPI_Comm_rank(m_communicator, &m_prank); + MPI_Comm_size(m_communicator, &m_psize); + + // computation of the local size of the grid the remainder is spread equally + // on the first processors + m_local_m = m / m_psize + (m_prank < m % m_psize ? 1 : 0); + m_local_n = n; + + // adding the ghosts lines if needed + if (m_psize > 1) + m_local_m += (m_prank == 0 || m_prank == m_psize - 1) ? 1 : 2; + + // computing the offsets of the local grid in the global one + m_offset_m = + (m / m_psize) * m_prank + (m_prank < m % m_psize ? m_prank : m % m_psize); + m_offset_n = 0; + + // resizing the different grids + m_grids.resize(m_local_m, m_local_n); + m_f.resize(m_local_m, m_local_n); + + m_grids.old().init_communications(); + m_grids.current().init_communications(); + + // determining the rank of the neighbors + m_north_prank = (m_prank == 0 ? MPI_PROC_NULL : m_prank - 1); + m_south_prank = (m_prank == (m_psize - 1) ? MPI_PROC_NULL : m_prank + 1); + + // Some info if needed to debug + // std::cout << m_prank << " " << m_global_m << " " << m_global_n << " " + // << m_local_m << " " << m_local_n << " " << m_offset_m << " " + // << m_offset_n << " " << m_north_prank << " " << m_south_prank + // << std::endl; +} + +/* -------------------------------------------------------------------------- */ +void Simulation::set_initial_conditions() { + int i_start = (m_prank == 0 ? 0 : 1); + int i_end = (m_prank == m_psize - 1 ? m_local_m : m_local_m - 1); + + for (int i = i_start; i < i_end; i++) { + for (int j = 0; j < m_local_n; j++) { + m_f(i, j) = -2. * 100. * M_PI * M_PI * + std::sin(10. * M_PI * (m_offset_m + i - i_start) * m_h_m) * + std::sin(10. * M_PI * (m_offset_n + j) * m_h_n); + } + } +} + +/* -------------------------------------------------------------------------- */ +std::tuple Simulation::compute() { + int s = 0; + float l2 = 0; + do { + l2 = compute_step(); + + m_grids.swap(); + + // m_dumper->dump(s); + ++s; + } while (l2 > m_epsilon); + + return std::make_tuple(std::sqrt(l2), s); +} + +/* -------------------------------------------------------------------------- */ +void Simulation::set_epsilon(float epsilon) { m_epsilon = epsilon * epsilon; } + +/* -------------------------------------------------------------------------- */ +float Simulation::epsilon() const { return m_epsilon; } + +/* -------------------------------------------------------------------------- */ +inline float Simulation::compute_row(int i) { + float l2 = 0; + + Grid & u = m_grids.current(); + Grid & uo = m_grids.old(); + + for (int j = 1; j < m_local_n - 1; j++) { + // computation of the new step + u(i, j) = 0.25 * (uo(i - 1, j) + uo(i + 1, j) + uo(i, j - 1) + + uo(i, j + 1) - m_f(i, j) * m_h_m * m_h_n); + + // L2 norm + l2 += (uo(i, j) - u(i, j)) * (uo(i, j) - u(i, j)); + } + + return l2; +} + +/* -------------------------------------------------------------------------- */ +float Simulation::compute_step() { + float l2 = 0.; + + Grid & uo = m_grids.old(); + + uo.start_all_communications(); + + // computing the inner rows that do not depend on the ghosts + for (int i = 2; i < m_local_m - 2; i++) { + l2 += compute_row(i); + } + + /// wait to receive the ghosts before using them for them computation + uo.wait_recv(); + + // computing the line that depends on the ghosts + l2 += compute_row(1); + l2 += compute_row(m_local_m - 2); + + /// wait to send everything before changing the buffers + uo.wait_send(); + + // Summing the value of all the processors together + MPI_Allreduce(MPI_IN_PLACE, &l2, 1, MPI_FLOAT, MPI_SUM, m_communicator); + + return l2; +} diff --git a/poisson/.solutions-persistent/simulation.hh b/poisson/.solutions-persistent/simulation.hh new file mode 100644 index 0000000..1dfe321 --- /dev/null +++ b/poisson/.solutions-persistent/simulation.hh @@ -0,0 +1,76 @@ +#ifndef SIMULATION_HH +#define SIMULATION_HH + +/* -------------------------------------------------------------------------- */ +#include "double_buffer.hh" +#include "dumpers.hh" +#include "grid.hh" +/* -------------------------------------------------------------------------- */ +#include +#include +#include +#include +/* -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- */ +class Simulation { +public: + Simulation(int m, int n, MPI_Comm communicator); + + /// set the initial conditions, Dirichlet and source term + virtual void set_initial_conditions(); + + /// perform the simulation + std::tuple compute(); + + /// access the precision + void set_epsilon(float epsilon); + float epsilon() const; + +protected: + /// compute one step and return an error + virtual float compute_step(); + + /// compute one row + float compute_row(int i); + +private: + /// Global problem size + int m_global_m, m_global_n; + + /// Local problem size + int m_local_m, m_local_n; + + /// local offset + int m_offset_m, m_offset_n; + + /// local neighbors + int m_north_prank, m_south_prank; + + /// proc rank + int m_prank; + + /// communicator size + int m_psize; + + /// Precision to achieve + float m_epsilon; + + /// grid spacing + float m_h_m; + float m_h_n; + + /// Grids storage + DoubleBuffer m_grids; + + /// source term + Grid m_f; + + /// Dumper to use for outputs + std::unique_ptr m_dumper; + + /// communicator + MPI_Comm m_communicator; +}; + +#endif /* SIMULATION_HH */