diff --git a/examples/hello_world/Makefile b/examples/hello_world/Makefile
index fc0ffdf..2dc6ea6 100644
--- a/examples/hello_world/Makefile
+++ b/examples/hello_world/Makefile
@@ -1,19 +1,16 @@
 F90=nvfortran
 F90FLAGS=-cuda
 
 .PHONY: clean distclean
 
 all:  hello_world_cpu.x hello_world_cuda.x mem_alloc.x
 
 hello_world_cuda.x: hello_world_cuda.f90
 hello_world_cpu.x: hello_world_cpu.f90
 mem_alloc.x: mem_alloc.f90
 
 %.x: %.f90
 	$(F90) $(F90FLAGS) $< -o $@
 
-distclean: clean
-	rm *.x
-
 clean:
-	rm -f *.o *.mod
+	rm -f *.o *.mod *.x
diff --git a/examples/hello_world/README b/examples/hello_world/README
new file mode 100644
index 0000000..5e1c6f1
--- /dev/null
+++ b/examples/hello_world/README
@@ -0,0 +1,5 @@
+This directory contains the warmup examples you can submit to follow the lecture.
+The directory contains: hello_world, memory allocation
+commands to be esecuted:
+$source source_file
+$sbatch script.sh
diff --git a/examples/hello_world/source_file b/examples/hello_world/source_file
new file mode 100644
index 0000000..9030d0f
--- /dev/null
+++ b/examples/hello_world/source_file
@@ -0,0 +1,2 @@
+#!/bin/bashrc
+module load nvhpc
diff --git a/examples/warmup/script.sh b/examples/warmup/script.sh
new file mode 100644
index 0000000..006f956
--- /dev/null
+++ b/examples/warmup/script.sh
@@ -0,0 +1,13 @@
+#!/bin/bash -l
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --ntasks-per-core=1
+#SBATCH --cpus-per-task=1
+#SBATCH --gres=gpu:1
+#SBATCH --reservation=spc-cuda-training-12.04
+#SBATCH --account=spc-cuda-training
+#SBATCH --time=0:05:00
+
+module load nvhpc
+
+nvidia-smi -a > output
diff --git a/exercises/ex1/hello_world.f90 b/exercises/ex1/hello_world.f90
index 17cd66a..070bae6 100644
--- a/exercises/ex1/hello_world.f90
+++ b/exercises/ex1/hello_world.f90
@@ -1,30 +1,29 @@
 module helloWorld
   implicit none
 
 contains
 
   subroutine hello_world_cpu
     write(*,*) 'hello world from CPU code'
   end subroutine hello_world_cpu
 
-  attributes(global) subroutine hello_world_cuda
-    write(*,*) 'hello world from CUDA code'
-  end subroutine hello_world_cuda
 
   attributes(global) subroutine hello_world_cuda_threads
-    write(*,*) 'hello world from thread', threadIdx%x
+    integer :: tid
+    tid = threadIdx%x
+    write(*,*) 'hello world from thread', tid
   end subroutine hello_world_cuda_threads
 end module helloWorld
 
 program testHelloWorld
   use cudafor
   use helloWorld
 
   implicit none
 
   integer :: istat
 
   call hello_world_cpu
-  call hello_world_cuda<<<1, 1>>>
+  call hello_world_cuda_threads<<<1, 1>>>
   istat = cudaDeviceSynchronize() 
 end program testHelloWorld
diff --git a/exercises/ex2/Makefile b/exercises/ex2/Makefile
new file mode 100644
index 0000000..c6c1368
--- /dev/null
+++ b/exercises/ex2/Makefile
@@ -0,0 +1,17 @@
+F90=nvfortran
+F90FLAGS=-cuda
+
+.PHONY: clean distclean
+
+all:  hello_world.x
+
+hello_world.x: hello_world.f90
+
+%.x: %.f90
+	$(F90) $(F90FLAGS) $< -o $@
+
+distclean: clean
+	rm *.x
+
+clean:
+	rm -f *.o *.mod
diff --git a/exercises/ex1/hello_world.f90 b/exercises/ex2/hello_world.f90
similarity index 67%
copy from exercises/ex1/hello_world.f90
copy to exercises/ex2/hello_world.f90
index 17cd66a..d9b56cc 100644
--- a/exercises/ex1/hello_world.f90
+++ b/exercises/ex2/hello_world.f90
@@ -1,30 +1,29 @@
 module helloWorld
   implicit none
 
 contains
 
   subroutine hello_world_cpu
     write(*,*) 'hello world from CPU code'
   end subroutine hello_world_cpu
 
-  attributes(global) subroutine hello_world_cuda
-    write(*,*) 'hello world from CUDA code'
-  end subroutine hello_world_cuda
 
   attributes(global) subroutine hello_world_cuda_threads
-    write(*,*) 'hello world from thread', threadIdx%x
+    integer :: tid
+    tid = ...
+    write(*,*) 'hello world from thread', tid
   end subroutine hello_world_cuda_threads
 end module helloWorld
 
 program testHelloWorld
   use cudafor
   use helloWorld
 
   implicit none
 
   integer :: istat
 
   call hello_world_cpu
-  call hello_world_cuda<<<1, 1>>>
+  call hello_world_cuda_threads<<<4, 4>>>
   istat = cudaDeviceSynchronize() 
 end program testHelloWorld
diff --git a/exercises/ex2/script.sh b/exercises/ex2/script.sh
new file mode 100644
index 0000000..a42041e
--- /dev/null
+++ b/exercises/ex2/script.sh
@@ -0,0 +1,14 @@
+#!/bin/bash -l
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --ntasks-per-core=1
+#SBATCH --cpus-per-task=1
+#SBATCH --gres=gpu:1
+#SBATCH --reservation=spc-cuda-training-12.04
+#SBATCH --account=spc-cuda-training
+#SBATCH --time=0:05:00
+
+module load nvhpc
+
+srun  -n 1 ./hello_world.x > output_hello_world
+
diff --git a/exercises/ex3/Makefile b/exercises/ex3/Makefile
new file mode 100644
index 0000000..b805b7f
--- /dev/null
+++ b/exercises/ex3/Makefile
@@ -0,0 +1,13 @@
+F90=nvfortran
+F90FLAGS=-Mcuda
+
+all:    saxpy_cuda_256elements.x
+          
+
+saxpy_cuda_256elements.x: saxpy_cuda_256elements.o
+	$(F90) $(F90FLAGS) saxpy_cuda_256elements.o -o $@
+saxpy_cuda_256elements.o: saxpy_cuda_256elements.F90
+	$(F90) $(F90FLAGS) -c saxpy_cuda_256elements.F90 -o $@
+
+clean:
+	rm *.x *.o
diff --git a/exercises/ex3/saxpy_cuda_256elements.F90 b/exercises/ex3/saxpy_cuda_256elements.F90
new file mode 100644
index 0000000..a894cd0
--- /dev/null
+++ b/exercises/ex3/saxpy_cuda_256elements.F90
@@ -0,0 +1,48 @@
+module mathOps
+contains
+  attributes(global) subroutine saxpy_cuda(x, y, a)
+    implicit none
+    real :: x(:), y(:)
+    real, value :: a
+    integer :: i, n
+    n = size(x)
+    !implement saxpy on GPU here
+  end subroutine saxpy_cuda
+
+  subroutine saxpy_cpu(x, y, a)
+    implicit none
+    real :: x(:), y(:)
+    real, value :: a
+    integer :: i, n
+    n = size(x)
+    do i=1, n
+        y(i) = y(i) + a*x(i)
+    end do
+  end subroutine saxpy_cpu
+
+end module mathOps
+
+program testSaxpy
+  use mathOps
+  use cudafor
+  implicit none
+  integer, parameter :: N = 256
+  integer istat, i
+  real :: x(N), y(N), a, norm2_cpu, norm2_gpu
+  real, device :: x_d(N), y_d(N)
+
+
+  do i=1, N
+      y(i) = 0.0
+      x(i) = i*0.458
+      a = 12.1
+  end do
+
+  x_d = x
+  y_d = y
+  !call the GPU implementation here
+  !y = y_d
+  istat = cudaDeviceSynchronize() 
+  call saxpy_cpu(x, y, a)
+  write(*,*) 'L2 norm: ', norm2(y-y_d)
+end program testSaxpy
diff --git a/exercises/ex3/script.sh b/exercises/ex3/script.sh
new file mode 100644
index 0000000..a42041e
--- /dev/null
+++ b/exercises/ex3/script.sh
@@ -0,0 +1,14 @@
+#!/bin/bash -l
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --ntasks-per-core=1
+#SBATCH --cpus-per-task=1
+#SBATCH --gres=gpu:1
+#SBATCH --reservation=spc-cuda-training-12.04
+#SBATCH --account=spc-cuda-training
+#SBATCH --time=0:05:00
+
+module load nvhpc
+
+srun  -n 1 ./hello_world.x > output_hello_world
+
diff --git a/exercises/ex4/Makefile b/exercises/ex4/Makefile
new file mode 100644
index 0000000..897d486
--- /dev/null
+++ b/exercises/ex4/Makefile
@@ -0,0 +1,13 @@
+F90=nvfortran
+F90FLAGS=-Mcuda
+
+all:    saxpy_cuda.x
+          
+
+saxpy_cuda.x: saxpy_cuda.o
+	$(F90) $(F90FLAGS) saxpy_cuda.o -o $@
+saxpy_cuda.o: saxpy_cuda.F90
+	$(F90) $(F90FLAGS) -c saxpy_cuda.F90 -o $@
+
+clean:
+	rm *.x *.o
diff --git a/exercises/ex4/saxpy_cuda.F90 b/exercises/ex4/saxpy_cuda.F90
new file mode 100644
index 0000000..523b490
--- /dev/null
+++ b/exercises/ex4/saxpy_cuda.F90
@@ -0,0 +1,48 @@
+module mathOps
+contains
+  attributes(global) subroutine saxpy_cuda(x, y, a)
+    implicit none
+    real :: x(:), y(:)
+    real, value :: a
+    integer :: i, n
+    n = size(x)
+    !implement saxpy on GPU here
+  end subroutine saxpy_cuda
+
+  subroutine saxpy_cpu(x, y, a)
+    implicit none
+    real :: x(:), y(:)
+    real, value :: a
+    integer :: i, n
+    n = size(x)
+    do i=1, n
+        y(i) = y(i) + a*x(i)
+    end do
+  end subroutine saxpy_cpu
+
+end module mathOps
+
+program testSaxpy
+  use mathOps
+  use cudafor
+  implicit none
+  integer, parameter :: N = 4000
+  integer istat, i
+  real :: x(N), y(N), a, norm2_cpu, norm2_gpu
+  real, device :: x_d(N), y_d(N)
+
+
+  do i=1, N
+      y(i) = 0.0
+      x(i) = i*0.458
+      a = 12.1
+  end do
+
+  x_d = x
+  y_d = y
+  !call the GPU implementation here
+  !y = y_d
+  istat = cudaDeviceSynchronize() 
+  call saxpy_cpu(x, y, a)
+  write(*,*) 'L2 norm: ', norm2(y-y_d)
+end program testSaxpy
diff --git a/exercises/ex4/script.sh b/exercises/ex4/script.sh
new file mode 100644
index 0000000..4f9804a
--- /dev/null
+++ b/exercises/ex4/script.sh
@@ -0,0 +1,14 @@
+#!/bin/bash -l
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --ntasks-per-core=1
+#SBATCH --cpus-per-task=1
+#SBATCH --gres=gpu:1
+#SBATCH --reservation=spc-cuda-training-12.04
+#SBATCH --account=spc-cuda-training
+#SBATCH --time=0:05:00
+
+module load nvhpc
+
+srun  -n 1 ./saxpy_cuda.x > output_saxpy_cuda
+
diff --git a/exercises/ex5/Makefile b/exercises/ex5/Makefile
new file mode 100644
index 0000000..897d486
--- /dev/null
+++ b/exercises/ex5/Makefile
@@ -0,0 +1,13 @@
+F90=nvfortran
+F90FLAGS=-Mcuda
+
+all:    saxpy_cuda.x
+          
+
+saxpy_cuda.x: saxpy_cuda.o
+	$(F90) $(F90FLAGS) saxpy_cuda.o -o $@
+saxpy_cuda.o: saxpy_cuda.F90
+	$(F90) $(F90FLAGS) -c saxpy_cuda.F90 -o $@
+
+clean:
+	rm *.x *.o
diff --git a/exercises/ex5/saxpy_cuda.F90 b/exercises/ex5/saxpy_cuda.F90
new file mode 100644
index 0000000..523b490
--- /dev/null
+++ b/exercises/ex5/saxpy_cuda.F90
@@ -0,0 +1,48 @@
+module mathOps
+contains
+  attributes(global) subroutine saxpy_cuda(x, y, a)
+    implicit none
+    real :: x(:), y(:)
+    real, value :: a
+    integer :: i, n
+    n = size(x)
+    !implement saxpy on GPU here
+  end subroutine saxpy_cuda
+
+  subroutine saxpy_cpu(x, y, a)
+    implicit none
+    real :: x(:), y(:)
+    real, value :: a
+    integer :: i, n
+    n = size(x)
+    do i=1, n
+        y(i) = y(i) + a*x(i)
+    end do
+  end subroutine saxpy_cpu
+
+end module mathOps
+
+program testSaxpy
+  use mathOps
+  use cudafor
+  implicit none
+  integer, parameter :: N = 4000
+  integer istat, i
+  real :: x(N), y(N), a, norm2_cpu, norm2_gpu
+  real, device :: x_d(N), y_d(N)
+
+
+  do i=1, N
+      y(i) = 0.0
+      x(i) = i*0.458
+      a = 12.1
+  end do
+
+  x_d = x
+  y_d = y
+  !call the GPU implementation here
+  !y = y_d
+  istat = cudaDeviceSynchronize() 
+  call saxpy_cpu(x, y, a)
+  write(*,*) 'L2 norm: ', norm2(y-y_d)
+end program testSaxpy
diff --git a/exercises/ex5/script.sh b/exercises/ex5/script.sh
new file mode 100644
index 0000000..4f9804a
--- /dev/null
+++ b/exercises/ex5/script.sh
@@ -0,0 +1,14 @@
+#!/bin/bash -l
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --ntasks-per-core=1
+#SBATCH --cpus-per-task=1
+#SBATCH --gres=gpu:1
+#SBATCH --reservation=spc-cuda-training-12.04
+#SBATCH --account=spc-cuda-training
+#SBATCH --time=0:05:00
+
+module load nvhpc
+
+srun  -n 1 ./saxpy_cuda.x > output_saxpy_cuda
+
diff --git a/notes b/notes
new file mode 100644
index 0000000..34aea3e
--- /dev/null
+++ b/notes
@@ -0,0 +1 @@
+-Load the compiler module with the command: module load nvhpc