Page MenuHomec4science

cg_main.cu
No OneTemporary

File Metadata

Created
Sun, Sep 1, 08:20

cg_main.cu

#include "cg.hh"
#include <chrono>
#include <iostream>
#include <cuda_runtime.h>
using clk = std::chrono::high_resolution_clock;
using second = std::chrono::duration<double>;
using time_point = std::chrono::time_point<clk>;
const bool DEBUG = false;
static void usage(const std::string & prog_name) {
std::cerr << prog_name << " <block_size.x> <path/to/matrix.mtx>" << std::endl;
exit(0);
}
/*
Implementation of a simple CG solver using matrix in the mtx format (Matrix
market) Any matrix in that format can be used to test the code
*/
int main(int argc, char ** argv) {
// -------------------------
// Read execution parameters
// -------------------------
if (argc < 3) usage(argv[0]);
int threadsPerBlock;
try {
threadsPerBlock = std::stoi(argv[1]);
} catch(std::invalid_argument &) {
usage(argv[0]);
}
if (threadsPerBlock > 1024 || threadsPerBlock < 1) {
std::cout << "CUDA block size must be greater than 0 and smaller than 1024" << std::endl;
usage(argv[0]);
}
CGSolver solver;
try {
solver.read_matrix(argv[2]);
} catch(std::invalid_argument &) {
usage(argv[0]);
}
// ---------------------------
// CUDA error handling & setup
// ---------------------------
// By default, we use device 0,
int dev_id = 0;
cudaDeviceProp device_prop;
cudaGetDevice(&dev_id);
cudaGetDeviceProperties(&device_prop, dev_id);
if (device_prop.computeMode == cudaComputeModeProhibited) {
std::cerr << "Error: device is running in <Compute Mode Prohibited>, no "
"threads can use ::cudaSetDevice()"
<< std::endl;
return -1;
}
auto error = cudaGetLastError();
if (error != cudaSuccess) {
std::cout << "cudaGetDeviceProperties returned error code " << error
<< ", line(" << __LINE__ << ")" << std::endl;
return error;
} else if (DEBUG) {
std::cout << "GPU Device " << dev_id << ": \"" << device_prop.name
<< "\" with compute capability " << device_prop.major << "."
<< device_prop.minor << std::endl;
}
// -----------------------
// Program execution begin
// -----------------------
int n = solver.n();
int m = solver.m();
double h = 1. / n;
int numBlocks = m / threadsPerBlock;
if (m % threadsPerBlock != 0) numBlocks++;
dim3 grid_size(numBlocks);
dim3 block_size(threadsPerBlock);
solver.setAccelerationParams(grid_size, block_size);
solver.init_source_term(h);
std::vector<double> x_d(n);
std::fill(x_d.begin(), x_d.end(), 0.);
if (DEBUG) {
std::cout << "Call CG dense on matrix size " << m << " x " << n << ")"
<< std::endl;
}
auto t1 = clk::now();
solver.solve(x_d);
second elapsed = clk::now() - t1;
if (DEBUG)
std::cout << "Time for CG (dense solver) = " << elapsed.count() << " [s]\n";
std::cout << numBlocks << " " << threadsPerBlock << " " << elapsed.count() << std::endl;
return 0;
}

Event Timeline