Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90737053
dgemm.cc
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Nov 4, 07:35
Size
1 KB
Mime Type
text/x-c
Expires
Wed, Nov 6, 07:35 (2 d)
Engine
blob
Format
Raw Data
Handle
22126952
Attached To
R11821 phys-743-lecture
dgemm.cc
View Options
#include <chrono>
#include <cmath>
#include <cstdio>
#include <omp.h>
#include <vector>
using clk = std::chrono::high_resolution_clock;
using second = std::chrono::duration<double>;
using time_point = std::chrono::time_point<clk>;
int main() {
int N = 2000;
std::vector<double> A(N * N), B(N * N), C(N * N);
for (auto &a : A)
a = 1.0;
for (auto &b : B)
b = 2.0;
for (auto &c : C)
c = 0.0;
auto verif = [&](auto &&A) {
double sum = 0.;
for (auto &a : A)
sum += a;
return sum;
};
auto nthreads = omp_get_max_threads();
auto t1 = clk::now();
#pragma omp parallel for collapse(1) schedule(static, N / nthreads)
for (int i = 0; i < N; ++i)
for (int j = 0; j < N; ++j)
for (int k = 0; k < N; ++k)
C[i * N + j] += A[i * N + k] * B[k * N + j];
second elapsed = clk::now() - t1;
std::printf("DGEMM with %i threads, collapse(1): %g GFLOP/s (verif %g)\n",
nthreads, ((2. * N * N * N) / elapsed.count() / 1e8),
verif(C) / (1. * N * N * N));
return 0;
}
Event Timeline
Log In to Comment