diff --git a/SConstruct b/SConstruct
index 4487c48..b1dad1e 100644
--- a/SConstruct
+++ b/SConstruct
@@ -1,258 +1,258 @@
 from __future__ import print_function
 import os
 from os.path import join, abspath
 from version import write_info_file
 
 
 def detect_fftw(env):
     """Detect fftw on clusters"""
     fftw_include = ""
     fftw_library = ""
 
     # If FFTW is provided by module system (on clusters)
     if 'FFTW_ROOT' in env['ENV']:
         fftw_include = join(env['ENV']['FFTW_ROOT'], 'include')
         fftw_library = join(env['ENV']['FFTW_ROOT'], 'lib')
 
     # Setting up FFTW
     env['FFTW_LIBRARY_WISH'] = ['main', 'omp']
     env['FFTW_INCLUDE_DIR'] = fftw_include
     env['FFTW_LIBRARY_DIR'] = fftw_library
     env.Tool(fftw)
 
 
 def detect_cuda(env):
     """Detect cuda on clusters"""
     if 'CUDA_ROOT' in env['ENV']:
         env['CUDA_TOOLKIT_PATH'] = env['ENV']['CUDA_ROOT']
     else:
         env['CUDA_TOOLKIT_PATH'] = '/opt/cuda'
 
     env['CUDA_COMPONENTS'] = ['cufft']
     env['CUDA_ARCH_FLAG'] = '-arch=sm_35'
 
     colors = env['COLOR_DICT']
     if not env['verbose']:
         env['NVCCCOMSTR'] = u'{0}[Compiling (cuda)] {1}$SOURCE{2}'.format(colors['green'],
                                                                           colors['blue'],
                                                                           colors['end'])
         env['SHLINKCOMSTR'] = u'{0}[Linking (cuda)] {1}$TARGET{2}'.format(colors['purple'],
                                                                           colors['blue'],
                                                                           colors['end'])
     env.AppendUnique(CXXFLAGS="-expt-extended-lambda")  # experimental lambda support
     env.AppendUnique(CXXFLAGS="-expt-relaxed-constexpr")  # experimental lambda support
     if env['build_type'] == 'debug':
         env.AppendUnique(CXXFLAGS="-G")
     env.Tool('nvcc')
 
 
 def detect_boost(env):
     """Detect boost on clusters"""
     if 'BOOST_ROOT' in env['ENV']:
         env['BOOST_INCLUDE_DIR'] = join(env['ENV']['BOOST_ROOT'], 'include')
     env.Tool(boost)
 
 
 def detect_thrust(env):
     """Detect cuda on clusters"""
     if 'CUDA_ROOT' in env['ENV']:
-        env['THRUST_INCLUDE_DIR'] = env['ENV']['CUDA_ROOT']
+        env['THRUST_INCLUDE_DIR'] = join(env['ENV']['CUDA_ROOT'], 'include')
     else:
         env['THRUST_INCLUDE_DIR'] = '/opt/cuda/include'
     env.Tool(thrust)
 
 
 def gen_print(action_string, color_string, env):
     """Generic function for creating pretty compile output"""
     if env['verbose']:
         return None
 
     def print_fun(command, target, source, env):
         colors = env['COLOR_DICT']
         print("{}[{}] {}{}{}".format(colors[color_string],
                                      action_string,
                                      colors['blue'],
                                      target[0],
                                      colors['end']))
     return print_fun
 
 
 # Compilation colors
 colors = {
   'cyan': '\033[96m',
   'purple': '\033[95m',
   'blue': '\033[94m',
   'green': '\033[92m',
   'yellow': '\033[93m',
   'red': '\033[91m',
   'end': '\033[0m'
 }
 
 # Inherit all environment variables (for CXX detection, etc.)
 main_env = Environment(ENV=os.environ)
 main_env['COLOR_DICT'] = colors
 
 # Compiler detection
 compiler_default = 'g++'
 if 'CXX' in os.environ:
     compiler_default = os.environ['CXX']
 
 # Build variables
 vars = Variables('build-setup.conf')
 vars.Add(EnumVariable('build_type', 'Build type', 'release',
                       allowed_values=('release', 'profiling', 'debug'),
                       ignorecase=2))
 vars.Add(EnumVariable('backend', 'Thrust backend', 'omp',
                       allowed_values=('omp', 'cuda'),
                       ignorecase=2))
 vars.Add('prefix', 'Prefix where to install', '/usr/local')
 vars.Add('CXX', 'Compiler', compiler_default)
 vars.Add('py_exec', 'Python executable', 'python')
 vars.Add(BoolVariable('timer', 'Activate the timer possibilities', False))
 vars.Add(BoolVariable('verbose', 'Activate verbosity', False))
 vars.Add(BoolVariable('build_doc', 'Build documentation', False))
 vars.Add(BoolVariable('color', 'Color the non-verbose compilation output', False))
 vars.Update(main_env)
 
 Help(vars.GenerateHelpText(main_env))
 
 # Save all options, not just those that differ from default
 with open('build-setup.conf', 'w') as setup:
     for key in vars.keys():
         setup.write("{} = '{}'\n".format(key, main_env[key]))
 
 build_type = main_env['build_type']
 build_dir = 'build-' + main_env['build_type']
 print("Building in " + build_dir)
 
 verbose = main_env['verbose']
 
 # Remove colors if not set
 if not main_env['color']:
     for key in colors:
         colors[key] = ''
 
 # Setting object suffix
 main_env['SHOBJSUFFIX'] = '.o'
 
 if not verbose:
     main_env['SHCXXCOMSTR'] = u'{0}[Compiling] {1}$SOURCE{2}'.format(colors['green'],
                                                                      colors['blue'],
                                                                      colors['end'])
     main_env['SHLINKCOMSTR'] = u'{0}[Linking] {1}$TARGET{2}'.format(colors['purple'],
                                                                     colors['blue'],
                                                                     colors['end'])
     main_env['SWIGCOMSTR'] = u'{0}[Swig] {1}$SOURCE{2}'.format(colors['yellow'],
                                                                colors['blue'],
                                                                colors['end'])
 
 # Include paths
 main_env.AppendUnique(CPPPATH=['#/src',
                                '#/src/core',
                                '#/src/bem',
                                '#/src/surface',
                                '#/src/python',
                                '#/src/percolation',
                                '#/src/model',
                                '#/src/solvers',
                                '#/src/gpu',
                                '#/python'])
 
 # Changing the shared object extension
 main_env['SHOBJSUFFIX'] = '.o'
 
 # Treating Intel compiler for OpenMP
 if main_env['CXX'] != 'icpc':
     omp_libs = ['gomp']
     omp_flag = '-fopenmp'
 else:
     omp_libs = ['']
     omp_flag = '-qopenmp'
 
 main_env.AppendUnique(LIBS=omp_libs)
 # main_env.AppendUnique(LINKFLAGS=[omp_flag])
 
 # Flags and options
 main_env.AppendUnique(CXXFLAGS=['-std=c++11',
                                 '-Wall',
                                 omp_flag])
 
 if main_env['backend'] == 'omp':
     main_env.AppendUnique(CPPDEFINES=["THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP"])
 elif main_env['backend'] == 'cuda':
     main_env.AppendUnique(CPPDEFINES=["THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CUDA"])
     main_env.AppendUnique(CPPDEFINES=['USE_CUDA'])
 
 # Adding compile flags defined in evironment
 if 'CXXFLAGS' in os.environ:
     main_env.AppendUnique(CXXFLAGS=Split(os.environ['CXXFLAGS']))
 
 if main_env['timer']:
     main_env.AppendUnique(CPPDEFINES=['USING_TIMER'])
 if build_type == 'debug':
     main_env.AppendUnique(CPPDEFINES=['TAMAAS_DEBUG'])
 
 # Compilation flags
 cxxflags_dict = {
     "debug": Split("-g -O0"),
     "profiling": Split("-g -pg -O2"),
     "release": Split("-O3")
 }
 
 # Link flags for shared libs
 shlinkflags_dict = {
     "debug": [],
     "profiling": ['-pg'],
     "release": []
 }
 
 main_env.AppendUnique(CXXFLAGS=cxxflags_dict[build_type])
 main_env.AppendUnique(SHLINKFLAGS=shlinkflags_dict[build_type])
 
 main_env['LIBPATH'] = [abspath(join(build_dir, 'src'))]
 main_env['RPATH'] = "$LIBPATH"
 
 detect_fftw(main_env)
 detect_boost(main_env)
 detect_thrust(main_env)
 
 # Activate cuda if needed
 if main_env['backend'] == 'cuda':
     detect_cuda(main_env)
 
 # Writing information file
 write_info_file("src/tamaas_info.cpp")
 
 # Saving the env file
 env_content = """export PYTHONPATH=$PYTHONPATH:{0}/python
 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{0}/src
 """
 
 
 def write_env_file(target, source, env):
     """Builder to write content to file"""
     with open(str(target[0]), 'w') as env_file:
         env_file.write(env_content.format(abspath(build_dir)))
 
 
 main_env['gen_print'] = gen_print
 env_file_env = main_env.Clone(PRINT_CMD_LINE_FUNC=gen_print("Writing", "cyan", main_env))
 # Need to have a command and manage tamaas_environement.sh as target because
 # the build directory does not always exist
 env_file_env.Command(join(build_dir, 'tamaas_environement.sh'), None, write_env_file)
 
 Export('main_env')
 
 
 # Building subdirs
 def subdir(dir): SConscript(join(dir, 'SConscript'),
                             variant_dir=join(build_dir, dir),
                             duplicate=True)
 
 
 for dir in ['src', 'python', 'tests']:
     subdir(dir)
 
 # Building documentation
 if main_env['build_doc']:
     subdir('doc')
diff --git a/src/core/loop.hh b/src/core/loop.hh
index a77fbc0..3ec40b8 100644
--- a/src/core/loop.hh
+++ b/src/core/loop.hh
@@ -1,156 +1,171 @@
 /**
  * @file
  *
  * @author Lucas Frérot <lucas.frerot@epfl.ch>
  *
  * @section LICENSE
  *
  * Copyright (©)  2017 EPFL  (Ecole Polytechnique  Fédérale de
  * Lausanne)  Laboratory (LSMS  -  Laboratoire de  Simulation  en Mécanique  des
  * Solides)
  *
  * Tamaas is free  software: you can redistribute it and/or  modify it under the
  * terms  of the  GNU Lesser  General Public  License as  published by  the Free
  * Software Foundation, either version 3 of the License, or (at your option) any
  * later version.
  *
  * Tamaas is  distributed in the  hope that it  will be useful, but  WITHOUT ANY
  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  * A  PARTICULAR PURPOSE. See  the GNU  Lesser General  Public License  for more
  * details.
  *
  * You should  have received  a copy  of the GNU  Lesser General  Public License
  * along with Tamaas. If not, see <http://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #ifndef __LOOP_HH__
 #define __LOOP_HH__
 /* -------------------------------------------------------------------------- */
 #include "tamaas.hh"
 #include "loops/apply.hh"
 #include "loops/loop_utils.hh"
 #include <type_traits>
 #include <thrust/for_each.h>
 #include <thrust/transform_reduce.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/tuple.h>
 
 __BEGIN_TAMAAS__
 
 /**
  * @brief Singleton class for automated loops using lambdas
 
  * This class is sweet candy :) It provides abstraction of the paralelism
  * paradigm used in loops and allows simple and less erro-prone loop syntax,
  * with minimum boiler plate. I love it <3
  */
 class Loop {
 public:
   /// Backends enumeration
   enum backend {
     omp, ///< [OpenMP](http://www.openmp.org/specifications/) backend
     cuda, ///< [Cuda](http://docs.nvidia.com/cuda/index.html) backend
   };
 
   /// Helper class to count iterations within lambda-loop
   template <typename T>
   class arange {
   public:
     using it_type = thrust::counting_iterator<T>;
     arange(T size):range_size(size) {}
     it_type begin() const { return it_type(T(0)); }
     it_type end() const { return it_type(range_size); }
   private:
     T range_size;
   };
 
   /// Loop functor over any number of grids
   template <typename Functor, typename... Grids>
   static void loop(Functor&& func, Grids&&... containers);
 
   /// Strided loop over any number of grids
   template <typename Functor, typename... Grids>
   static void stridedLoop(Functor&& func, Grids&&... containers);
 
   /// Reduce over any number of grids
   template <operation op, typename Functor, typename... Grids>
   static auto reduce(Functor&& func, Grids&&... containers)
     -> decltype(func(containers(0)...));
 
   /// Strided reduce over any number of grids
   template <operation op, typename Functor, typename... Grids>
   static auto stridedReduce(Functor&& func, Grids&&... containers)
     -> decltype(func(containers(0)...));
 
   /// Constructor
   Loop() = delete;
 };
 
 
 /* -------------------------------------------------------------------------- */
 /* Template implementation                                                    */
 /* -------------------------------------------------------------------------- */
 
 template <typename Functor, typename... Grids>
 void Loop::loop(Functor&& func, Grids&&... containers) {
   auto begin = thrust::make_zip_iterator(thrust::make_tuple(containers.begin()...));
   auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end()...));
 
   thrust::for_each(begin, end, detail::ApplyFunctor<Functor>(func));
+
+#ifdef USE_CUDA
+  cudaDeviceSynchronize();
+#endif
 }
 
 /* -------------------------------------------------------------------------- */
 
 template <typename Functor, typename... Grids>
 void Loop::stridedLoop(Functor&& func, Grids&&... containers) {
   auto begin =
     thrust::make_zip_iterator(thrust::make_tuple(containers.begin(containers.getNbComponents())...));
   auto end =
     thrust::make_zip_iterator(thrust::make_tuple(containers.end(containers.getNbComponents())...));
 
   thrust::for_each(begin, end, detail::ApplyFunctor<Functor>(func));
+#ifdef USE_CUDA
+  cudaDeviceSynchronize();
+#endif
 }
 
 /* -------------------------------------------------------------------------- */
 
 template <operation op, typename Functor, typename... Grids>
 auto Loop::reduce(Functor&& func, Grids&&... containers)
   -> decltype(func(containers(0)...)) {
   auto begin = thrust::make_zip_iterator(thrust::make_tuple(containers.begin()...));
   auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end()...));
 
   using reduce_type = decltype(func(containers(0)...));
   using apply_type = detail::ApplyFunctor<Functor, reduce_type>;
 
   auto red_helper = detail::reduction_helper<op, apply_type>(apply_type(func));
+  auto result = thrust::reduce(begin, end, red_helper.template init<reduce_type>(), red_helper);
 
-  return thrust::reduce(begin, end, red_helper.template init<reduce_type>(), red_helper);
+#ifdef USE_CUDA
+  cudaDeviceSynchronize();
+#endif
+  return result;
 }
 
 /* -------------------------------------------------------------------------- */
 
 template <operation op, typename Functor, typename... Grids>
 auto Loop::stridedReduce(Functor&& func, Grids&&... containers)
   -> decltype(func(containers(0)...)) {
   auto begin = thrust::make_zip_iterator(thrust::make_tuple(containers.begin(containers.getNbComponents())...));
   auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end(containers.getNbComponents())...));
 
   using reduce_type = decltype(func(containers(0)...));
   using apply_type = detail::ApplyFunctor<Functor, reduce_type>;
 
   auto red_helper = detail::reduction_helper<op, apply_type>(apply_type(func));
 
-  return thrust::reduce(begin, end, red_helper.template init<reduce_type>(), red_helper);
+  auto result = thrust::reduce(begin, end, red_helper.template init<reduce_type>(), red_helper);
 
+#ifdef USE_CUDA
+  cudaDeviceSynchronize();
+#endif
+  return result;
 }
 
 /* -------------------------------------------------------------------------- */
 
 __END_TAMAAS__
 
 #undef EXEC_CASE_MACRO
 #undef REDUCE_CASE_MACRO
 
 #endif // __LOOP_HH__
diff --git a/src/core/loops/apply.hh b/src/core/loops/apply.hh
index a73b0ed..aa4440e 100644
--- a/src/core/loops/apply.hh
+++ b/src/core/loops/apply.hh
@@ -1,147 +1,149 @@
 /**
  * @file
  *
  * @author Lucas Frérot <lucas.frerot@epfl.ch>
  *
  * @section LICENSE
  *
  * Copyright (©)  2017 EPFL  (Ecole Polytechnique  Fédérale de
  * Lausanne)  Laboratory (LSMS  -  Laboratoire de  Simulation  en Mécanique  des
  * Solides)
  *
  * Tamaas is free  software: you can redistribute it and/or  modify it under the
  * terms  of the  GNU Lesser  General Public  License as  published by  the Free
  * Software Foundation, either version 3 of the License, or (at your option) any
  * later version.
  *
  * Tamaas is  distributed in the  hope that it  will be useful, but  WITHOUT ANY
  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  * A  PARTICULAR PURPOSE. See  the GNU  Lesser General  Public License  for more
  * details.
  *
  * You should  have received  a copy  of the GNU  Lesser General  Public License
  * along with Tamaas. If not, see <http://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #ifndef __APPLY_HH__
 #define __APPLY_HH__
 /* -------------------------------------------------------------------------- */
 #include "tamaas.hh"
 #include <thrust/tuple.h>
 #include <utility>
 #include <cstddef>
 /* -------------------------------------------------------------------------- */
 
 __BEGIN_TAMAAS__
 
 namespace detail {
 
 /// Helper function for application of a functor on a thrust::tuple
 template <size_t nargs>
 struct Apply;
 
 template <>
 struct Apply<0> {
   template <typename Functor, typename Tuple>
   __host__ __device__
   static auto apply(Functor&& func, Tuple&& t [[gnu::unused]])
     -> decltype(func()) {
     return func();
   }
 };
 
 template <>
 struct Apply<1> {
   template <typename Functor, typename Tuple>
   __host__ __device__
   static auto apply(Functor&& func, Tuple&& t)
     -> decltype(func(thrust::get<0>(std::forward<Tuple>(t)))) {
     return func(thrust::get<0>(std::forward<Tuple>(t)));
   }
 };
 
 template <>
 struct Apply<2> {
   template <typename Functor, typename Tuple>
   __host__ __device__
   static auto apply(Functor&& func, Tuple&& t)
     -> decltype(func(thrust::get<0>(std::forward<Tuple>(t)),
 		     thrust::get<1>(std::forward<Tuple>(t)))) {
     return func(thrust::get<0>(std::forward<Tuple>(t)),
 		thrust::get<1>(std::forward<Tuple>(t)));
   }
 };
 
 template <>
 struct Apply<3> {
   template <typename Functor, typename Tuple>
   __host__ __device__
   static auto apply(Functor&& func, Tuple&& t)
     -> decltype(func(thrust::get<0>(std::forward<Tuple>(t)),
 		     thrust::get<1>(std::forward<Tuple>(t)),
 		     thrust::get<2>(std::forward<Tuple>(t)))) {
     return func(thrust::get<0>(std::forward<Tuple>(t)),
 		thrust::get<1>(std::forward<Tuple>(t)),
 		thrust::get<2>(std::forward<Tuple>(t)));
   }
 };
 
 template <>
 struct Apply<4> {
   template <typename Functor, typename Tuple>
   __host__ __device__
   static auto apply(Functor&& func, Tuple&& t)
     -> decltype(func(thrust::get<0>(std::forward<Tuple>(t)),
 		     thrust::get<1>(std::forward<Tuple>(t)),
 		     thrust::get<2>(std::forward<Tuple>(t)),
 		     thrust::get<3>(std::forward<Tuple>(t)))) {
     return func(thrust::get<0>(std::forward<Tuple>(t)),
 		thrust::get<1>(std::forward<Tuple>(t)),
 		thrust::get<2>(std::forward<Tuple>(t)),
 		thrust::get<3>(std::forward<Tuple>(t)));
   }
 };
 
 template <>
 struct Apply<5> {
   template <typename Functor, typename Tuple>
   __host__ __device__
   static auto apply(Functor&& func, Tuple&& t)
     -> decltype(func(thrust::get<0>(std::forward<Tuple>(t)),
 		     thrust::get<1>(std::forward<Tuple>(t)),
 		     thrust::get<2>(std::forward<Tuple>(t)),
 		     thrust::get<3>(std::forward<Tuple>(t)),
 		     thrust::get<4>(std::forward<Tuple>(t)))) {
     return func(thrust::get<0>(std::forward<Tuple>(t)),
 		thrust::get<1>(std::forward<Tuple>(t)),
 		thrust::get<2>(std::forward<Tuple>(t)),
 		thrust::get<3>(std::forward<Tuple>(t)),
 		thrust::get<4>(std::forward<Tuple>(t)));
   }
 };
 
 /// Helper class for functor application in thrust
 template <typename Functor, typename ret_type = void>
 class ApplyFunctor {
 
 public:
+  __host__ __device__
   ApplyFunctor(const Functor & functor):functor(functor) {}
+  __host__ __device__
   ApplyFunctor(const ApplyFunctor & o):functor(o.functor) {}
 
   template <typename Tuple>
   __host__ __device__
   ret_type operator()(Tuple&& t) const {
     return Apply<thrust::tuple_size<typename std::remove_reference<Tuple>::type>::value>::
       apply(functor, std::forward<Tuple>(t));
   }
 
 private:
   const Functor & functor;
 };
 
 } // namespace detail
 
 __END_TAMAAS__
 
 #endif