File Metadata

Created: Wed, Jul 10, 01:57

fft3d_cuda.cu
View Options

	/* ----------------------------------------------------------------------
	LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator

	Original Version:
	http://lammps.sandia.gov, Sandia National Laboratories
	Steve Plimpton, sjplimp@sandia.gov

	See the README file in the top-level LAMMPS directory.

	-----------------------------------------------------------------------

	USER-CUDA Package and associated modifications:
	https://sourceforge.net/projects/lammpscuda/

	Christian Trott, christian.trott@tu-ilmenau.de
	Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
	Theoretical Physics II, University of Technology Ilmenau, Germany

	See the README file in the USER-CUDA directory.

	This software is distributed under the GNU General Public License.
	------------------------------------------------------------------------- */

	//#define CUDA_PRECISION 1
	#include "cuda_precision.h"
	#include "cuda_common.h"
	struct FFT_DATA {
	FFT_CFLOAT re;
	FFT_CFLOAT im;
	};

	#include "fft3d_cuda_cu.h"
	#include "fft3d_cuda_kernel.cu"
	#include <stdio.h>

	void initfftdata(double* in, FFT_CFLOAT* out, int nfast, int nmid, int nslow)
	{

	dim3 grid;
	grid.x = nslow;
	grid.y = nmid;
	grid.z = 1;
	dim3 threads;
	threads.x = nfast;
	threads.y = 1;
	threads.z = 1;
	cudaThreadSynchronize();
	initfftdata_kernel <<< grid, threads, 0>>>(in, out);
	cudaThreadSynchronize();
	MYDBG(printf("ERROR-CUDA initfftdata_kernel: %s\n", cudaGetErrorString(cudaGetLastError())));
	}


	void permute(FFT_DATA* in, FFT_DATA* out, int nfast, int nmid, int nslow)
	{

	dim3 grid;
	grid.x = nslow;
	grid.y = nmid;
	grid.z = 1;
	dim3 threads;
	threads.x = nfast * 2;
	threads.y = 1;
	threads.z = 1;
	permute_kernel <<< grid, threads, 0>>>((FFT_CFLOAT)in, (FFT_CFLOAT)out);
	cudaThreadSynchronize();
	MYDBG(printf("ERROR-CUDA permute_kernel: %s\n", cudaGetErrorString(cudaGetLastError())));
	}

	void permute_scale(FFT_DATA* in, FFT_DATA* out, int nfast, int nmid, int nslow)
	{

	dim3 grid;
	grid.x = nslow;
	grid.y = nmid;
	grid.z = 1;
	dim3 threads;
	threads.x = nfast * 2;
	threads.y = 1;
	threads.z = 1;
	permute_kernel <<< grid, threads, 0>>>((FFT_CFLOAT)in, (FFT_CFLOAT)out);
	cudaThreadSynchronize();
	}
	void permute_part(FFT_DATA* in, FFT_DATA* out, int nfast, int nmid, int nslow, int ihi, int ilo, int jhi, int jlo, int khi, int klo)
	{

	dim3 grid;
	grid.x = (ihi - ilo + 1);
	grid.y = (jhi - jlo + 1);
	grid.z = 1;
	dim3 threads;
	threads.x = (khi - klo + 1) * 2;
	threads.y = 1;
	threads.z = 1;
	permute_part_kernel <<< grid, threads, 0>>>((FFT_CFLOAT)in, (FFT_CFLOAT)out, nfast, nmid, nslow, ihi, ilo, jhi, jlo, khi, klo);
	cudaThreadSynchronize();
	}

	void FFTsyncthreads()
	{
	cudaThreadSynchronize();
	}

fft3d_cuda.cu
No OneTemporary
Actions

File Metadata

fft3d_cuda.cu
View Options

Event Timeline

fft3d_cuda.cuNo OneTemporaryActions

File Metadata

fft3d_cuda.cuView Options

Event Timeline

fft3d_cuda.cu
No OneTemporary
Actions

fft3d_cuda.cu
View Options