Page MenuHomec4science

simulation_gpu_loud.cu
No OneTemporary

File Metadata

Created
Sun, Oct 6, 15:26

simulation_gpu_loud.cu

/* -------------------------------------------------------------------------- */
#include "simulation.hh"
#include "grid.hh"
/* -------------------------------------------------------------------------- */
#include <iostream>
#include <exception>
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
__global__ void compute_step_one_thread_per_row(
Grid uo, Grid u, Grid f, float h) {
// TODO: implement here the 'per row' version.
int row = blockIdx.y*blockDim.y + threadIdx.y + 1;
// Catch out-of-bounds indices
if (row > u.m() - 2) {
return;
}
// float l2 = 0.0;
for (int j = 1; j < u.n() - 1; j++) {
// computation of the new step
u(row, j) = 0.25 * (uo(row - 1, j) + uo(row + 1, j) + uo(row, j - 1) +
uo(row, j + 1) - f(row, j) * h * h);
// L2 norm
// l2 += (uo(row, j) - u(row, j)) * (uo(row, j) - u(row, j));
}
// Do we care about l2 at all? I presume not.
__syncthreads(); // We don't want one thread racing ahead of the others
}
/* -------------------------------------------------------------------------- */
__global__ void compute_step_one_thread_per_entry(
Grid uo, Grid u, Grid f, float h) {
// TODO: implement here the 'per entry' version.
int row_ix = blockIdx.y*blockDim.y + threadIdx.y + 1;
int col_ix = blockIdx.x*blockDim.x + threadIdx.x + 1;
// Catch out-of-bounds indices
if (row_ix > u.m() - 2 || col_ix > u.n() - 2){
return;
}
u(row_ix, col_ix) = 0.25 * (uo(row_ix - 1, col_ix) + uo(row_ix + 1, col_ix) + uo(row_ix, col_ix - 1) +
uo(row_ix, col_ix + 1) - f(row_ix, col_ix) * h * h);
__syncthreads(); // We don't want one thread racing ahead of the others
}
/* -------------------------------------------------------------------------- */
void Simulation::compute_step(const dim3 block_size) {
Grid & u = m_grids.current();
Grid & uo = m_grids.old();
int m = u.m() - 2; // First & last rows don't need to be assigned to a thread
int n = u.n() - 2; // First & last column don't need to be assigned threads
dim3 grid_size; // TODO: define your grid size
#if defined(PER_ROW)
grid_size.x = 1;
grid_size.y = (m + block_size.y - 1)/block_size.y;
#else
grid_size.x = (n + block_size.x - 1)/block_size.x;
grid_size.y = (m + block_size.y - 1)/block_size.y;
#endif
static bool first{true};
if (first) {
std::cout << "Block size: " << block_size.x << ":" << block_size.y << "\n"
<< "Grid_size: " << grid_size.x << ":" << grid_size.y << std::endl;
first = false;
}
#if defined(PER_ROW)
// TODO: call here the implementation by row
compute_step_one_thread_per_row<<<grid_size, block_size>>>(uo, u, m_f, m_h_m);
#else
// TODO: call here the implementation by entry
compute_step_one_thread_per_entry<<<grid_size, block_size>>>(uo, u, m_f, m_h_m);
#endif
auto error = cudaGetLastError();
if(error != cudaSuccess) {
throw std::runtime_error("Error Launching Kernel: "
+ std::string(cudaGetErrorName(error)) + " - "
+ std::string(cudaGetErrorString(error)));
}
}

Event Timeline