File Metadata

Created: Sun, Oct 6, 15:26

simulation_gpu_loud.cu
View Options

	/* -------------------------------------------------------------------------- */
	#include "simulation.hh"
	#include "grid.hh"
	/* -------------------------------------------------------------------------- */
	#include <iostream>
	#include <exception>
	/* -------------------------------------------------------------------------- */

	/* -------------------------------------------------------------------------- */
	__global__ void compute_step_one_thread_per_row(
	Grid uo, Grid u, Grid f, float h) {

	// TODO: implement here the 'per row' version.
	int row = blockIdx.y*blockDim.y + threadIdx.y + 1;

	// Catch out-of-bounds indices
	if (row > u.m() - 2) {
	return;
	}
	// float l2 = 0.0;

	for (int j = 1; j < u.n() - 1; j++) {
	// computation of the new step
	u(row, j) = 0.25 * (uo(row - 1, j) + uo(row + 1, j) + uo(row, j - 1) +
	uo(row, j + 1) - f(row, j) * h * h);

	// L2 norm
	// l2 += (uo(row, j) - u(row, j)) * (uo(row, j) - u(row, j));
	}
	// Do we care about l2 at all? I presume not.
	__syncthreads(); // We don't want one thread racing ahead of the others
	}



	/* -------------------------------------------------------------------------- */
	__global__ void compute_step_one_thread_per_entry(
	Grid uo, Grid u, Grid f, float h) {

	// TODO: implement here the 'per entry' version.
	int row_ix = blockIdx.y*blockDim.y + threadIdx.y + 1;
	int col_ix = blockIdx.x*blockDim.x + threadIdx.x + 1;

	// Catch out-of-bounds indices
	if (row_ix > u.m() - 2 \|\| col_ix > u.n() - 2){
	return;
	}

	u(row_ix, col_ix) = 0.25 * (uo(row_ix - 1, col_ix) + uo(row_ix + 1, col_ix) + uo(row_ix, col_ix - 1) +
	uo(row_ix, col_ix + 1) - f(row_ix, col_ix) * h * h);

	__syncthreads(); // We don't want one thread racing ahead of the others
	}

	/* -------------------------------------------------------------------------- */
	void Simulation::compute_step(const dim3 block_size) {
	Grid & u = m_grids.current();
	Grid & uo = m_grids.old();

	int m = u.m() - 2; // First & last rows don't need to be assigned to a thread
	int n = u.n() - 2; // First & last column don't need to be assigned threads

	dim3 grid_size; // TODO: define your grid size
	#if defined(PER_ROW)
	grid_size.x = 1;
	grid_size.y = (m + block_size.y - 1)/block_size.y;
	#else
	grid_size.x = (n + block_size.x - 1)/block_size.x;
	grid_size.y = (m + block_size.y - 1)/block_size.y;
	#endif

	static bool first{true};
	if (first) {
	std::cout << "Block size: " << block_size.x << ":" << block_size.y << "\n"
	<< "Grid_size: " << grid_size.x << ":" << grid_size.y << std::endl;
	first = false;
	}

	#if defined(PER_ROW)
	// TODO: call here the implementation by row
	compute_step_one_thread_per_row<<<grid_size, block_size>>>(uo, u, m_f, m_h_m);
	#else
	// TODO: call here the implementation by entry
	compute_step_one_thread_per_entry<<<grid_size, block_size>>>(uo, u, m_f, m_h_m);
	#endif

	auto error = cudaGetLastError();
	if(error != cudaSuccess) {
	throw std::runtime_error("Error Launching Kernel: "
	+ std::string(cudaGetErrorName(error)) + " - "
	+ std::string(cudaGetErrorString(error)));
	}
	}

simulation_gpu_loud.cu
No OneTemporary
Actions

File Metadata

simulation_gpu_loud.cu
View Options

Event Timeline

simulation_gpu_loud.cuNo OneTemporaryActions

File Metadata

simulation_gpu_loud.cuView Options

Event Timeline

simulation_gpu_loud.cu
No OneTemporary
Actions

simulation_gpu_loud.cu
View Options