Page MenuHomec4science

dgemm.cc
No OneTemporary

File Metadata

Created
Mon, Nov 4, 07:35

dgemm.cc

#include <chrono>
#include <cmath>
#include <cstdio>
#include <omp.h>
#include <vector>
using clk = std::chrono::high_resolution_clock;
using second = std::chrono::duration<double>;
using time_point = std::chrono::time_point<clk>;
int main() {
int N = 2000;
std::vector<double> A(N * N), B(N * N), C(N * N);
for (auto &a : A)
a = 1.0;
for (auto &b : B)
b = 2.0;
for (auto &c : C)
c = 0.0;
auto verif = [&](auto &&A) {
double sum = 0.;
for (auto &a : A)
sum += a;
return sum;
};
auto nthreads = omp_get_max_threads();
auto t1 = clk::now();
#pragma omp parallel for collapse(1) schedule(static, N / nthreads)
for (int i = 0; i < N; ++i)
for (int j = 0; j < N; ++j)
for (int k = 0; k < N; ++k)
C[i * N + j] += A[i * N + k] * B[k * N + j];
second elapsed = clk::now() - t1;
std::printf("DGEMM with %i threads, collapse(1): %g GFLOP/s (verif %g)\n",
nthreads, ((2. * N * N * N) / elapsed.count() / 1e8),
verif(C) / (1. * N * N * N));
return 0;
}

Event Timeline