diff --git a/examples/hello_world/Makefile b/examples/hello_world/Makefile new file mode 100644 index 0000000..fc0ffdf --- /dev/null +++ b/examples/hello_world/Makefile @@ -0,0 +1,19 @@ +F90=nvfortran +F90FLAGS=-cuda + +.PHONY: clean distclean + +all: hello_world_cpu.x hello_world_cuda.x mem_alloc.x + +hello_world_cuda.x: hello_world_cuda.f90 +hello_world_cpu.x: hello_world_cpu.f90 +mem_alloc.x: mem_alloc.f90 + +%.x: %.f90 + $(F90) $(F90FLAGS) $< -o $@ + +distclean: clean + rm *.x + +clean: + rm -f *.o *.mod diff --git a/examples/hello_world/hello_world_cpu.f90 b/examples/hello_world/hello_world_cpu.f90 new file mode 100644 index 0000000..27e3550 --- /dev/null +++ b/examples/hello_world/hello_world_cpu.f90 @@ -0,0 +1,17 @@ +module helloWorld + implicit none + +contains + + subroutine hello_world_cpu + write(*,*) 'hello world' + end subroutine hello_world_cpu +end module helloWorld + +program testHelloWorld + use helloWorld + + implicit none + + call hello_world_cpu +end program testHelloWorld diff --git a/examples/hello_world/hello_world_cuda.f90 b/examples/hello_world/hello_world_cuda.f90 new file mode 100644 index 0000000..c77a1c2 --- /dev/null +++ b/examples/hello_world/hello_world_cuda.f90 @@ -0,0 +1,32 @@ +module helloWorld + implicit none + +contains + + subroutine hello_world_cpu + write(*,*) 'hello world from CPU code' + end subroutine hello_world_cpu + + attributes(global) subroutine hello_world_cuda + write(*,*) 'hello world from CUDA code' + end subroutine hello_world_cuda + + attributes(global) subroutine hello_world_cuda_threads + write(*,*) 'hello world from thread', threadIdx%x + end subroutine hello_world_cuda_threads +end module helloWorld + +program testHelloWorld + use cudafor + use helloWorld + + implicit none + + integer :: istat + + call hello_world_cpu + call hello_world_cuda<<<1, 1>>> + istat = cudaDeviceSynchronize() + call hello_world_cuda_threads<<<1, 8>>> + istat = cudaDeviceSynchronize() +end program testHelloWorld diff --git a/examples/hello_world/mem_alloc.f90 b/examples/hello_world/mem_alloc.f90 new file mode 100644 index 0000000..2d9c3c9 --- /dev/null +++ b/examples/hello_world/mem_alloc.f90 @@ -0,0 +1,41 @@ +module memoryTest + implicit none + +contains + + subroutine print_cpu(a) + real, intent(in) :: a(:) + write(*,*) a + end subroutine print_cpu + + + attributes(global) subroutine modify_from_gpu(a) + real, intent(out) :: a(:) + a = a + 10.0 + end subroutine modify_from_gpu +end module memoryTest + +program testMemory + use cudafor + use memoryTest + + implicit none + + integer :: istat, i, j, N + real, managed, allocatable :: a(:) + + N = 8 + allocate(a(N)) + + do i = 1, N + a(i) = i * 0.124 + end do + + write(*,*) 'Before GPU modification' + call print_cpu(a) + call modify_from_gpu<<<1, 1>>>(a) + istat = cudaDeviceSynchronize() + write(*,*) 'After GPU modification' + call print_cpu(a) + deallocate(A) +end program testMemory diff --git a/examples/hello_world/script.sh b/examples/hello_world/script.sh new file mode 100644 index 0000000..0188c13 --- /dev/null +++ b/examples/hello_world/script.sh @@ -0,0 +1,16 @@ +#!/bin/bash -l +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --ntasks-per-core=1 +#SBATCH --cpus-per-task=1 +#SBATCH --gres=gpu:1 +#SBATCH --reservation=spc-cuda-training-12.04 +#SBATCH --account=spc-cuda-training +#SBATCH --time=0:05:00 + +module load nvhpc + +srun -n 1 ./hello_world_cuda.x > output_hello_world_cpu +srun -n 1 ./hello_world_cuda.x > output_hello_world_cuda +srun -n 1 ./mem_alloc.x > output_mem_alloc +