diff --git a/SConstruct b/SConstruct
index 1a4db53..3eb7a4d 100644
--- a/SConstruct
+++ b/SConstruct
@@ -1,240 +1,242 @@
 from __future__ import print_function
 import os
 from os.path import join, abspath
 from version import write_info_file
 
 
 def detect_fftw(env):
     """Detect fftw on clusters"""
     fftw_include = ""
     fftw_library = ""
 
     # If FFTW is provided by module system (on clusters)
     if 'FFTW_ROOT' in env['ENV']:
         fftw_include = join(env['ENV']['FFTW_ROOT'], 'include')
         fftw_library = join(env['ENV']['FFTW_ROOT'], 'lib')
 
     # Setting up FFTW
     env['FFTW_LIBRARY_WISH'] = ['main', 'omp']
     env['FFTW_INCLUDE_DIR'] = fftw_include
     env['FFTW_LIBRARY_DIR'] = fftw_library
     env.Tool(fftw)
 
 
 def detect_cuda(env):
     """Detect cuda on clusters"""
     if 'CUDA_ROOT' in env['ENV']:
         env['CUDA_TOOLKIT_PATH'] = env['ENV']['CUDA_ROOT']
     else:
         env['CUDA_TOOLKIT_PATH'] = '/opt/cuda'
 
     env['CUDA_COMPONENTS'] = ['cufft']
     env['CUDA_ARCH_FLAG'] = '-arch=sm_35'
 
     colors = env['COLOR_DICT']
     if not env['verbose']:
         env['NVCCCOMSTR'] = u'{0}[Compiling (cuda)] {1}$SOURCE{2}'.format(colors['green'],
                                                                           colors['blue'],
                                                                           colors['end'])
         env['SHLINKCOMSTR'] = u'{0}[Linking (cuda)] {1}$TARGET{2}'.format(colors['purple'],
                                                                           colors['blue'],
                                                                           colors['end'])
     env.AppendUnique(CXXFLAGS="-expt-extended-lambda")  # experimental lambda support
     env.AppendUnique(CXXFLAGS="-expt-relaxed-constexpr")  # experimental lambda support
+    if env['build_type'] == 'debug':
+        env.AppendUnique(CXXFLAGS="-G")
     env.Tool('nvcc')
 
 
 def detect_boost(env):
     """Detect boost on clusters"""
     if 'BOOST_ROOT' in env['ENV']:
         env.AppendUnique(CPPPATH=[join(env['ENV']['BOOST_ROOT'], 'include')])
 
 
 def gen_print(action_string, color_string, env):
     """Generic function for creating pretty compile output"""
     if env['verbose']:
         return None
 
     def print_fun(command, target, source, env):
         colors = env['COLOR_DICT']
         print("{}[{}] {}{}{}".format(colors[color_string],
                                      action_string,
                                      colors['blue'],
                                      target[0],
                                      colors['end']))
     return print_fun
 
 
 # Compilation colors
 colors = {
   'cyan': '\033[96m',
   'purple': '\033[95m',
   'blue': '\033[94m',
   'green': '\033[92m',
   'yellow': '\033[93m',
   'red': '\033[91m',
   'end': '\033[0m'
 }
 
 # Inherit all environment variables (for CXX detection, etc.)
 main_env = Environment(ENV=os.environ)
 main_env['COLOR_DICT'] = colors
 
 # Compiler detection
 compiler_default = 'g++'
 if 'CXX' in os.environ:
     compiler_default = os.environ['CXX']
 
 # Build variables
 vars = Variables('build-setup.conf')
 vars.Add(EnumVariable('build_type', 'Build type', 'release',
                       allowed_values=('release', 'profiling', 'debug'),
                       ignorecase=2))
 vars.Add('prefix', 'Prefix where to install', '/usr/local')
 vars.Add('CXX', 'Compiler', compiler_default)
 vars.Add('py_exec', 'Python executable', 'python')
 vars.Add(BoolVariable('timer', 'Activate the timer possibilities', False))
 vars.Add(BoolVariable('verbose', 'Activate verbosity', False))
 vars.Add(BoolVariable('build_doc', 'Build documentation', False))
 vars.Add(BoolVariable('color', 'Color the non-verbose compilation output', False))
 vars.Add(BoolVariable('cuda', 'Activate the CUDA library', False))
 vars.Update(main_env)
 
 Help(vars.GenerateHelpText(main_env))
 
 # Save all options, not just those that differ from default
 with open('build-setup.conf', 'w') as setup:
     for key in vars.keys():
         setup.write("{} = '{}'\n".format(key, main_env[key]))
 
 build_type = main_env['build_type']
 build_dir = 'build-' + main_env['build_type']
 print("Building in " + build_dir)
 
 verbose = main_env['verbose']
 
 # Remove colors if not set
 if not main_env['color']:
     for key in colors:
         colors[key] = ''
 
 # Setting object suffix
 main_env['SHOBJSUFFIX'] = '.o'
 
 if not verbose:
     main_env['SHCXXCOMSTR'] = u'{0}[Compiling] {1}$SOURCE{2}'.format(colors['green'],
                                                                      colors['blue'],
                                                                      colors['end'])
     main_env['SHLINKCOMSTR'] = u'{0}[Linking] {1}$TARGET{2}'.format(colors['purple'],
                                                                     colors['blue'],
                                                                     colors['end'])
     main_env['SWIGCOMSTR'] = u'{0}[Swig] {1}$SOURCE{2}'.format(colors['yellow'],
                                                                colors['blue'],
                                                                colors['end'])
 
 # Include paths
 main_env.AppendUnique(CPPPATH=['#/src',
                                '#/src/core',
                                '#/src/bem',
                                '#/src/surface',
                                '#/src/python',
                                '#/src/percolation',
                                '#/src/model',
                                '#/src/solvers',
                                '#/src/gpu',
                                '#/python'])
 
 # Changing the shared object extension
 main_env['SHOBJSUFFIX'] = '.o'
 
 # Treating Intel compiler for OpenMP
 if main_env['CXX'] != 'icpc':
     omp_libs = ['gomp']
     omp_flag = '-fopenmp'
 else:
     omp_libs = ['']
     omp_flag = '-qopenmp'
 
 main_env.AppendUnique(LIBS=omp_libs)
 # main_env.AppendUnique(LINKFLAGS=[omp_flag])
 
 # Flags and options
 main_env.AppendUnique(CXXFLAGS=['-std=c++11',
                                 '-Wall',
                                 omp_flag])
 
 # Adding compile flags defined in evironment
 if 'CXXFLAGS' in os.environ:
     main_env.AppendUnique(CXXFLAGS=Split(os.environ['CXXFLAGS']))
 
 if main_env['timer']:
     main_env.AppendUnique(CPPDEFINES=['USING_TIMER'])
 if build_type == 'debug':
     main_env.AppendUnique(CPPDEFINES=['TAMAAS_DEBUG'])
 
 if main_env['cuda']:
     main_env.AppendUnique(CPPDEFINES=['USE_CUDA'])
 
 # Compilation flags
 cxxflags_dict = {
     "debug": Split("-g -O0"),
     "profiling": Split("-g -pg -O2"),
     "release": Split("-O3")
 }
 
 # Link flags for shared libs
 shlinkflags_dict = {
     "debug": [],
     "profiling": ['-pg'],
     "release": []
 }
 
 main_env.AppendUnique(CXXFLAGS=cxxflags_dict[build_type])
 main_env.AppendUnique(SHLINKFLAGS=shlinkflags_dict[build_type])
 
 main_env['LIBPATH'] = [abspath(join(build_dir, 'src'))]
 main_env['RPATH'] = "$LIBPATH"
 
 detect_fftw(main_env)
 detect_boost(main_env)
 
 # Activate cuda if needed
 if main_env['cuda']:
     detect_cuda(main_env)
 
 # Writing information file
 write_info_file("src/tamaas_info.cpp")
 
 # Saving the env file
 env_content = """export PYTHONPATH=$PYTHONPATH:{0}/python
 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{0}/src
 """
 
 
 def write_env_file(target, source, env):
     """Builder to write content to file"""
     with open(str(target[0]), 'w') as env_file:
         env_file.write(env_content.format(abspath(build_dir)))
 
 
 main_env['gen_print'] = gen_print
 env_file_env = main_env.Clone(PRINT_CMD_LINE_FUNC=gen_print("Writing", "cyan", main_env))
 # Need to have a command and manage tamaas_environement.sh as target because
 # the build directory does not always exist
 env_file_env.Command(join(build_dir, 'tamaas_environement.sh'), None, write_env_file)
 
 Export('main_env')
 
 
 # Building subdirs
 def subdir(dir): SConscript(join(dir, 'SConscript'),
                             variant_dir=join(build_dir, dir),
                             duplicate=True)
 
 
 for dir in ['src', 'python', 'tests']:
     subdir(dir)
 
 # Building documentation
 if main_env['build_doc']:
     subdir('doc')
diff --git a/src/core/loops/loop_utils.hh b/src/core/loops/loop_utils.hh
index dde0acd..7742891 100644
--- a/src/core/loops/loop_utils.hh
+++ b/src/core/loops/loop_utils.hh
@@ -1,106 +1,106 @@
 /**
  * @file
  *
  * @author Lucas Frérot <lucas.frerot@epfl.ch>
  *
  * @section LICENSE
  *
  * Copyright (©)  2017 EPFL  (Ecole Polytechnique  Fédérale de
  * Lausanne)  Laboratory (LSMS  -  Laboratoire de  Simulation  en Mécanique  des
  * Solides)
  *
  * Tamaas is free  software: you can redistribute it and/or  modify it under the
  * terms  of the  GNU Lesser  General Public  License as  published by  the Free
  * Software Foundation, either version 3 of the License, or (at your option) any
  * later version.
  *
  * Tamaas is  distributed in the  hope that it  will be useful, but  WITHOUT ANY
  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  * A  PARTICULAR PURPOSE. See  the GNU  Lesser General  Public License  for more
  * details.
  *
  * You should  have received  a copy  of the GNU  Lesser General  Public License
  * along with Tamaas. If not, see <http://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #ifndef __LOOP_UTILS_HH__
 #define __LOOP_UTILS_HH__
 /* -------------------------------------------------------------------------- */
 #include "tamaas.hh"
 #include "loop.hh"
 #include <type_traits>
 #include <limits>
 /* -------------------------------------------------------------------------- */
 
 __BEGIN_TAMAAS__
 
 namespace detail {
 
 template <bool only_points = false, typename... Grids>
-constexpr UInt loopSize(Grids&&... grids) {
+UInt loopSize(Grids&&... grids) {
   return (only_points)?
     std::get<0>(std::forward_as_tuple(grids...)).getNbPoints():
     std::get<0>(std::forward_as_tuple(grids...)).dataSize();
 }
 
 template <operation op>
 struct reduction_helper;
 
 template <>
 struct reduction_helper<operation::plus> {
   template<typename T>
   static constexpr T init() {
     return T(0);
   }
 
   template<typename T>
   static void reduce(T& res, T val) {
     res += val;
   }
 };
 
 template <>
 struct reduction_helper<operation::times> {
   template<typename T>
   static constexpr T init() {
     return T(1);
   }
 
   template<typename T>
   static void reduce(T& res, T val) {
     res *= val;
   }
 };
 
 template <>
 struct reduction_helper<operation::min> {
   template<typename T>
   static constexpr T init() {
     return std::numeric_limits<T>::max();
   }
 
   template<typename T>
   static void reduce(T& res, T val) {
     res = std::min(res, val);
   }
 };
 
 template <>
 struct reduction_helper<operation::max> {
   template<typename T>
   static constexpr T init() {
     return std::numeric_limits<T>::lowest();
   }
 
   template<typename T>
   static void reduce(T& res, T val) {
     res = std::max(res, val);
   }
 };
 
 }
 
 __END_TAMAAS__
 
 #endif // __LOOP_UTILS_HH
diff --git a/src/core/tamaas.cpp b/src/core/tamaas.cpp
index 705ab11..da5c1f4 100644
--- a/src/core/tamaas.cpp
+++ b/src/core/tamaas.cpp
@@ -1,58 +1,58 @@
 /**
  * @file
  *
  * @author Lucas Frérot <lucas.frerot@epfl.ch>
  *
  * @section LICENSE
  *
  * Copyright (©)  2016 EPFL  (Ecole Polytechnique  Fédérale de
  * Lausanne)  Laboratory (LSMS  -  Laboratoire de  Simulation  en Mécanique  des
  * Solides)
  *
  * Tamaas is free  software: you can redistribute it and/or  modify it under the
  * terms  of the  GNU Lesser  General Public  License as  published by  the Free
  * Software Foundation, either version 3 of the License, or (at your option) any
  * later version.
  *
  * Tamaas is  distributed in the  hope that it  will be useful, but  WITHOUT ANY
  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  * A  PARTICULAR PURPOSE. See  the GNU  Lesser General  Public License  for more
  * details.
  *
  * You should  have received  a copy  of the GNU  Lesser General  Public License
  * along with Tamaas. If not, see <http://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 
 #include <fftw3.h>
 #include <omp.h>
 #include "tamaas.hh"
 #include "fft_plan_manager.hh"
 #include "loop.hh"
 
 /* -------------------------------------------------------------------------- */
 
 __BEGIN_TAMAAS__
 
 void initialize(UInt num_threads) {
   fftw_init_threads();
 
   if (num_threads) omp_set_num_threads(num_threads);
   fftw_plan_with_nthreads(omp_get_max_threads());
 
 #ifdef USE_CUDA
-  Loop::init(Loop::omp); // for now
+  Loop::init(Loop::cuda); // for now
 #else
   Loop::init(Loop::omp);
 #endif
 }
 
 /* -------------------------------------------------------------------------- */
 
 void finalize() {
   FFTPlanManager::get().clean();
   fftw_cleanup_threads();
 }
 
 __END_TAMAAS__
diff --git a/src/gpu/loops/loop_cuda.hh b/src/gpu/loops/loop_cuda.hh
index 919926b..c8704ed 100644
--- a/src/gpu/loops/loop_cuda.hh
+++ b/src/gpu/loops/loop_cuda.hh
@@ -1,75 +1,81 @@
 /**
  * @file
  *
  * @author Lucas Frérot <lucas.frerot@epfl.ch>
  *
  * @section LICENSE
  *
  * Copyright (©)  2017 EPFL  (Ecole Polytechnique  Fédérale de
  * Lausanne)  Laboratory (LSMS  -  Laboratoire de  Simulation  en Mécanique  des
  * Solides)
  *
  * Tamaas is free  software: you can redistribute it and/or  modify it under the
  * terms  of the  GNU Lesser  General Public  License as  published by  the Free
  * Software Foundation, either version 3 of the License, or (at your option) any
  * later version.
  *
  * Tamaas is  distributed in the  hope that it  will be useful, but  WITHOUT ANY
  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  * A  PARTICULAR PURPOSE. See  the GNU  Lesser General  Public License  for more
  * details.
  *
  * You should  have received  a copy  of the GNU  Lesser General  Public License
  * along with Tamaas. If not, see <http://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #ifndef __LOOP_CUDA_HH__
 #define __LOOP_CUDA_HH__
 /* -------------------------------------------------------------------------- */
 #include "tamaas.hh"
 #include "loops/loop_utils.hh"
 #include <cuda_runtime_api.h>
 
 __BEGIN_TAMAAS__
 
 namespace detail {
 
 #define GRID_STRIDED_LOOP(operation, size) do {			\
   const UInt index = blockIdx.x * blockDim.x + threadIdx.x;	\
   const UInt stride = blockDim.x * gridDim.x;			\
   for (UInt i = index ; i < (size) ; i += stride) {		\
     (operation);						\
   }} while(0)							\
 
 template <bool strided, typename Functor, typename... T>
 __global__ void exec_loop_kernel(Functor& func, UInt size, T... data_pointers) {
-  GRID_STRIDED_LOOP(func(data_pointers[i]...), size);
+  //GRID_STRIDED_LOOP(func(data_pointers[i]...), size);
+  UInt index = blockIdx.x * blockDim.x + threadIdx.x;
+  UInt stride = blockDim.x * gridDim.x;
+  for (UInt i = index ; i < size ; i += stride) {
+    func(data_pointers[i]...);
+  }
 }
 
 template <bool strided, typename Functor, typename... Grids>
 void exec_loop_cuda(Functor&& func, Grids&&... grids) {
   Int device = 0, threads_per_block = 0;
   cudaGetDevice(&device);
   cudaDeviceGetAttribute(&threads_per_block, cudaDevAttrMaxThreadsPerBlock, device);
 
   const UInt loop_size = loopSize<strided>(grids...);
 
   UInt blocks = loop_size / threads_per_block;
   blocks += (threads_per_block * blocks == loop_size)? 0 : 1; // make sure we cover all data
 
-  exec_loop_kernel<strided><<<blocks, threads_per_block>>>(func, loop_size, grids.getInternalData()...);
+  //exec_loop_kernel<strided><<<blocks, threads_per_block>>>(func, loop_size, grids.getInternalData()...);
+  exec_loop_kernel<strided><<<1, 1>>>(func, loop_size, grids.getInternalData()...);
 }
 
 template <operation op, bool strided, typename Functor, typename... Grids>
 auto reduce_loop_cuda(Functor&& func, Grids&&... grids) -> decltype(func(grids(0)...)) {
   return func(grids(0)...);
 }
 
 #undef GRID_STRIDED_LOOP
 
 } // namespace detail
 
 __END_TAMAAS__
 
 #endif // __LOOP_CUDA_HH__
diff --git a/tests/SConscript b/tests/SConscript
index 005d35f..90c4048 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -1,50 +1,51 @@
 from __future__ import print_function
 from os.path import join, abspath
 
 
 Import('main_env')
 
 print("Environment for tests")
 test_env = main_env.Clone(
     PRINT_CMD_LINE_FUNC=main_env['gen_print']("Copying", "red", main_env))
 
 test_files = Split("""
 run_tests.sh
 test_hertz_pressure.py
 test_westergaard.py
 test_patch_westergaard.py
 test_surface.py
 test_autocorrelation.py
 test_hertz_disp.py
 test_hertz_kato.py
 test_hertz_adhesion.py
 test_saturated_pressure.py
 test_fftransform.py
 test_bem_grid.py
 test_flood_fill.py
 """)
 
 src_dir = "#/tests"
 build_dir = 'build-' + main_env['build_type'] + '/tests'
 
 for file in test_files:
     source = join(src_dir, file)
     test_env.Command(file, source, Copy("$TARGET", "$SOURCE"))
 
 crit_env = main_env.Clone(tools=[criterion])
 
 if 'SHCXXCOMSTR' in main_env:
     crit_env['CXXCOMSTR'] = main_env['SHCXXCOMSTR']
 if 'SHLINKCOMSTR' in main_env:
     crit_env['LINKCOMSTR'] = main_env['SHLINKCOMSTR']
 
 crit_env.AppendUnique(LIBS=['Tamaas'],
                       LIBPATH=[abspath('build-' + main_env['build_type'] + '/src')])
 
 cpp_test_files = Split("""
 test_grid.cpp
 test_loop.cpp
+test_cuda.cpp
 """)
 
 for file in cpp_test_files:
     crit_env.Program(file)
diff --git a/tests/test_cuda.cpp b/tests/test_cuda.cpp
new file mode 100644
index 0000000..3fc3c6e
--- /dev/null
+++ b/tests/test_cuda.cpp
@@ -0,0 +1,15 @@
+#include "tamaas.hh"
+#include "grid.hh"
+
+using namespace tamaas;
+
+int main() {
+  initialize();
+  Grid<Real, 1> grid({20}, 1);
+  Loop::loop([] CUDA_LAMBDA (Real & x) {
+    x = 1;
+  }, grid);
+  std::cout << grid << std::endl;
+  finalize();
+  return 0;
+}