diff --git a/Makefile b/Makefile index e1adf03..7b3cf52 100644 --- a/Makefile +++ b/Makefile @@ -1,27 +1,27 @@ EXEC=gbs_gradients # Excutable name # on izar -#FC=gfortran # GNU Fortran compiler -FC=nvfortran # NVHPC Fortran compiler +FC=gfortran # GNU Fortran compiler +#FC=nvfortran # NVHPC Fortran compiler # #FLAGS= -O3 -fopenmp -foffload=-lgfortran -lm -foffload="-lm" # options for OpenMP with GNU -FLAGS= -O3 -static-nvidia -mp=gpu -gpu=cc70 -Minfo=mp # options for openMP with NVHPC +#FLAGS= -O3 -static-nvidia -mp=gpu -gpu=cc70 -Minfo=mp # options for openMP with NVHPC #FLAGS=-acc -gpu=cc70 # options for openACC with NVHPC # CUDAC=nvcc # CUDA C compiler CUDAFLAGS= -O3 # options for CUDA C CUDA=CUDA # all: $(EXEC) gbs_gradients: test_gbs_gradients.F90 gradients_mod.F90 space_grid_mod.F90 prec_const_mod.F90 $(FC) $(FLAGS) prec_const_mod.F90 space_grid_mod.F90 gradients_mod.F90 test_gbs_gradients.F90 -o $(EXEC) gbs_gradients_cuda: fortranfiles.o cudafiles.o fortranmain.o $(CUDAC) -o $(EXEC) $(CUDAFLAGS) -D$(CUDA) -lgfortran -lcuda test_gbs_gradients.o gradients_mod.o gradients_cuda_mod.o gradients_fd4.o space_grid_mod.o prec_const_mod.o cuda_memory.o fortranmain.o: test_gbs_gradients.F90 $(FC) -c $(FLAGS) -D$(CUDA) test_gbs_gradients.F90 fortranfiles.o: gradients_mod.F90 space_grid_mod.F90 prec_const_mod.F90 - $(FC) -c $(FLAGS) -D$(CUDA) prec_const_mod.F90 space_grid_mod.F90 gradients_cuda_mod.F90 gradients_mod.F90 + $(FC) -c $(FLAGS) -D$(CUDA) prec_const_mod.F90 space_grid_mod.F90 gradients_mod.F90 gradients_cuda_mod.F90 cudafiles.o: cuda_memory.cu $(CUDAC) $(CUDAFLAGS) -c cuda_memory.cu gradients_fd4.cu clean: rm -f gbs_gradients gbs_gradients_cuda *.mod *.o *~ diff --git a/space_grid_mod.F90 b/space_grid_mod.F90 index 6a50140..b04d297 100644 --- a/space_grid_mod.F90 +++ b/space_grid_mod.F90 @@ -1,167 +1,167 @@ !> @brief Grid module for spatial discretization module space_grid use prec_const implicit none ! Start and end ghost cells in local and global grids integer, public :: izs=3,iys=3,ixs=3 ! start index integer, public :: ixe,iye,ize ! end index integer, public :: nxGhost=2,nyGhost=2,nzGhost=2 ! size of ghost boundary integer, public :: ixsg, ixeg ! start/end indexes for x integer, public :: iysg, iyeg ! start/end indexes for y integer, public :: izsg, izeg ! start/end indexes for z real(dp), public :: deltaxi=0.005,deltayi=0.003,deltazi=0.03 ! space step interface gbs_allocate module procedure gbs_allocate_dp1,gbs_allocate_dp2,gbs_allocate_dp3,gbs_allocate_dp4 end interface gbs_allocate #ifdef CUDA interface gbs_allocate_cuda module procedure gbs_allocate_cuda_dp4, gbs_allocate_cuda_dp3, gbs_allocate_cuda_dp2, gbs_allocate_cuda_dp1 end interface gbs_allocate_cuda interface function allocate_cuda_memory(n) bind(c, name='allocate_cuda_memory') use iso_c_binding type(c_ptr) :: allocate_cuda_memory integer :: n end function allocate_cuda_memory end interface #endif contains subroutine compute_mesh(nx,ny,nz) integer, intent(in) :: nx,ny,nz ixsg=ixs-nxGhost iysg=iys-nyGhost izsg=izs-nzGhost ! ixe=ixs+nx-1 iye=iys+ny-1 ize=izs+nz-1 ! ixeg=ixe+nxGhost iyeg=iye+nyGhost izeg=ize+nzGhost end subroutine compute_mesh subroutine init_array(a) real(dp), dimension(iysg:iyeg,ixsg:ixeg,izsg:izeg), intent(inout) :: a integer :: i,j,k do k=izsg,izeg do i=ixsg,ixeg do j=iysg,iyeg a(j, i, k) = (dble(i)*3.2+dble(j)*2.1+dble(k)*45.12) end do end do end do end subroutine init_array subroutine init_array2d(a) real(dp), dimension(iysg:iyeg,ixsg:ixeg), intent(inout) :: a integer :: i,j do i=ixsg,ixeg do j=iysg,iyeg a(j, i) = (dble(i)*3.1+dble(j)*2.1) end do end do end subroutine init_array2d !> @brief Wrapper routine to allocate and initialize 1D double array subroutine gbs_allocate_dp1(a,is1,ie1) real(dp), dimension(:), allocatable, intent(inout) :: a !< Input array integer, intent(in) :: is1,ie1 !< Starting and ending indices allocate(a(is1:ie1)) a = 0.0_dp end subroutine gbs_allocate_dp1 !> @brief Wrapper routine to allocate and initialize 2D double array subroutine gbs_allocate_dp2(a,is1,ie1,is2,ie2) real(dp), dimension(:,:), allocatable, intent(inout) :: a !< Input array integer, intent(in) :: is1,ie1,is2,ie2 !< Starting and ending indices allocate(a(is1:ie1,is2:ie2)) a = 0.0_dp end subroutine gbs_allocate_dp2 !> @brief Wrapper routine to allocate and initialize 3D double array subroutine gbs_allocate_dp3(a,is1,ie1,is2,ie2,is3,ie3) real(dp), dimension(:,:,:), allocatable, intent(inout) :: a !< Input array integer, intent(in) :: is1,ie1,is2,ie2,is3,ie3 !< Starting and ending indices allocate(a(is1:ie1,is2:ie2,is3:ie3)) a = 0.0_dp end subroutine gbs_allocate_dp3 !> @brief Wrapper routine to allocate and initialize 4D double array subroutine gbs_allocate_dp4(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4) real(dp), dimension(:,:,:,:), allocatable, intent(inout) :: a !< Input array integer, intent(in) :: is1,ie1,is2,ie2,is3,ie3,is4,ie4 !< Starting and ending indices allocate(a(is1:ie1,is2:ie2,is3:ie3,is4:ie4)) a = 0.0_dp end subroutine gbs_allocate_dp4 #ifdef CUDA subroutine gbs_allocate_cuda_dp4(a,is1,ie1,is2,ie2,is3,ie3,is4,ie4) use iso_c_binding real(dp), dimension(:,:,:,:), pointer, intent(inout) :: a !< Input array integer, intent(in) :: is1,ie1,is2,ie2,is3,ie3,is4,ie4 !< Starting and ending indices integer ndata ndata=(ie4-is4+1)*(ie3-is3+1)*(ie2-is2+1)*(ie1-is1+1) - call c_f_pointer(allocate_cuda_memory(ndata), a, \ + call c_f_pointer(allocate_cuda_memory(ndata), a, & [(ie1-is1+1),(ie2-is2+1),(ie3-is3+1),(ie4-is4+1)]) a(is1:,is2:,is3:,is4:) => a a=0.0_dp end subroutine gbs_allocate_cuda_dp4 subroutine gbs_allocate_cuda_dp3(a,is1,ie1,is2,ie2,is3,ie3) use iso_c_binding real(dp), dimension(:,:,:), pointer, intent(inout) :: a integer, intent(in) :: is1,ie1,is2,ie2,is3,ie3 integer ndata ndata=(ie3-is3+1)*(ie2-is2+1)*(ie1-is1+1) - call c_f_pointer(allocate_cuda_memory(ndata), a, \ + call c_f_pointer(allocate_cuda_memory(ndata), a, & [(ie1-is1+1),(ie2-is2+1),(ie3-is3+1)]) a(is1:,is2:,is3:) => a a=0.0_dp end subroutine gbs_allocate_cuda_dp3 subroutine gbs_allocate_cuda_dp2(a,is1,ie1,is2,ie2) use iso_c_binding real(dp), dimension(:,:), pointer, intent(inout) :: a integer, intent(in) :: is1,ie1,is2,ie2 integer ndata ndata=(ie2-is2+1)*(ie1-is1+1) - call c_f_pointer(allocate_cuda_memory(ndata), a, \ + call c_f_pointer(allocate_cuda_memory(ndata), a, & [(ie1-is1+1),(ie2-is2+1)]) a(is1:,is2:) => a a=0.0_dp end subroutine gbs_allocate_cuda_dp2 subroutine gbs_allocate_cuda_dp1(a,is1,ie1) use iso_c_binding real(dp), dimension(:), pointer, intent(inout) :: a integer, intent(in) :: is1,ie1 integer ndata ndata=(ie1-is1+1) - call c_f_pointer(allocate_cuda_memory(ndata), a, \ + call c_f_pointer(allocate_cuda_memory(ndata), a, & [(ie1-is1+1)]) a(is1:) => a a=0.0_dp end subroutine gbs_allocate_cuda_dp1 #endif end module space_grid