diff --git a/src/core/loop.hh b/src/core/loop.hh
index 6414ff8..4e37c1b 100644
--- a/src/core/loop.hh
+++ b/src/core/loop.hh
@@ -1,221 +1,226 @@
 /**
  * @file
  *
  * @author Lucas Frérot <lucas.frerot@epfl.ch>
  *
  * @section LICENSE
  *
  * Copyright (©)  2017 EPFL  (Ecole Polytechnique  Fédérale de
  * Lausanne)  Laboratory (LSMS  -  Laboratoire de  Simulation  en Mécanique  des
  * Solides)
  *
  * Tamaas is free  software: you can redistribute it and/or  modify it under the
  * terms  of the  GNU Lesser  General Public  License as  published by  the Free
  * Software Foundation, either version 3 of the License, or (at your option) any
  * later version.
  *
  * Tamaas is  distributed in the  hope that it  will be useful, but  WITHOUT ANY
  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  * A  PARTICULAR PURPOSE. See  the GNU  Lesser General  Public License  for more
  * details.
  *
  * You should  have received  a copy  of the GNU  Lesser General  Public License
  * along with Tamaas. If not, see <http://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #ifndef __LOOP_HH__
 #define __LOOP_HH__
 /* -------------------------------------------------------------------------- */
 #include "loops/apply.hh"
 #include "loops/loop_utils.hh"
 #include "tamaas.hh"
 #include <thrust/execution_policy.h>
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/transform_reduce.h>
 #include <thrust/tuple.h>
 #include <type_traits>
 
 __BEGIN_TAMAAS__
 
 template <typename T>
 struct is_policy : std::false_type {};
 template <>
 struct is_policy<thrust::detail::host_t> : std::true_type {};
 template <>
 struct is_policy<thrust::detail::device_t> : std::true_type {};
 template <>
 struct is_policy<const thrust::detail::host_t> : std::true_type {};
 template <>
 struct is_policy<const thrust::detail::device_t> : std::true_type {};
 template <>
 struct is_policy<const thrust::detail::host_t&> : std::true_type {};
 template <>
 struct is_policy<const thrust::detail::device_t&> : std::true_type {};
 
 /**
  * @brief Singleton class for automated loops using lambdas
 
  * This class is sweet candy :) It provides abstraction of the paralelism
  * paradigm used in loops and allows simple and less erro-prone loop syntax,
  * with minimum boiler plate. I love it <3
  */
 class Loop {
 public:
   /// Backends enumeration
   enum backend {
     omp,   ///< [OpenMP](http://www.openmp.org/specifications/) backend
     cuda,  ///< [Cuda](http://docs.nvidia.com/cuda/index.html) backend
   };
 
   /// Helper class to count iterations within lambda-loop
   template <typename T>
   class arange {
   public:
     using it_type = thrust::counting_iterator<T>;
-    arange(T size) : range_size(size) {}
-    it_type begin(UInt i = 0) const { return it_type(T(0)); }
+    arange(T start, T size) : start(start), range_size(size) {}
+    it_type begin(UInt i = 0) const { return it_type(T(start)); }
     it_type end(UInt i = 0) const { return it_type(range_size); }
     UInt getNbComponents() const { return 1; }
 
   private:
-    T range_size;
+    T start, range_size;
   };
 
   template <typename T>
   static arange<T> range(T size) {
-    return arange<T>(size);
+    return arange<T>(0, size);
+  }
+
+  template <typename T, typename U>
+  static arange<T> range(U start, T size) {
+    return arange<T>(start, size);
   }
 
   /// Loop functor over any number of grids
   template <typename Functor, typename... Grids>
   static void loop(Functor&& func, Grids&&... containers);
 
   /// Strided loop over any number of grids with parallel policy
   template <typename DerivedPolicy, typename Functor, typename... Grids>
   static void stridedLoop(const thrust::execution_policy<DerivedPolicy>& policy,
                           Functor&& func, Grids&&... containers) {
     stridedLoopImpl(policy, std::forward<Functor>(func),
                     std::forward<Grids>(containers)...);
   }
 
   /// Strided loop over any number of grids
   template <typename Functor, typename... Grids>
   static typename std::enable_if<not is_policy<Functor>::value, void>::type
   stridedLoop(Functor&& func, Grids&&... containers) {
     stridedLoopImpl(thrust::device, std::forward<Functor>(func),
                     std::forward<Grids>(containers)...);
   }
 
 private:
   /// Implementation of strided loop overloads
   template <typename DerivedPolicy, typename Functor, typename... Grids>
   static void
   stridedLoopImpl(const thrust::execution_policy<DerivedPolicy>& policy,
                   Functor&& func, Grids&&... containers);
 
 public:
   /// Reduce over any number of grids
   template <operation op, typename Functor, typename... Grids>
   static auto reduce(Functor&& func, Grids&&... containers)
       -> decltype(func(containers(0)...));
 
   /// Strided reduce over any number of grids
   template <operation op, typename Functor, typename... Grids>
   static auto stridedReduce(Functor&& func, Grids&&... containers)
       -> decltype(func(containers(0)...));
 
   /// Constructor
   Loop() = delete;
 };
 
 /* -------------------------------------------------------------------------- */
 /* Template implementation                                                    */
 /* -------------------------------------------------------------------------- */
 
 template <typename Functor, typename... Grids>
 void Loop::loop(Functor&& func, Grids&&... containers) {
   auto begin =
       thrust::make_zip_iterator(thrust::make_tuple(containers.begin()...));
   auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end()...));
 
   thrust::for_each(begin, end, detail::ApplyFunctor<Functor>(func));
 
 #ifdef USE_CUDA
   cudaDeviceSynchronize();
 #endif
 }
 
 /* -------------------------------------------------------------------------- */
 
 template <typename DevicePolicy, typename Functor, typename... Grids>
 void Loop::stridedLoopImpl(
     const thrust::execution_policy<DevicePolicy>& /*policy*/, Functor&& func,
     Grids&&... containers) {
   auto begin = thrust::make_zip_iterator(
       thrust::make_tuple(containers.begin(containers.getNbComponents())...));
   auto end = thrust::make_zip_iterator(
       thrust::make_tuple(containers.end(containers.getNbComponents())...));
 
   thrust::for_each(begin, end, detail::ApplyFunctor<Functor>(func));
 #ifdef USE_CUDA
   cudaDeviceSynchronize();
 #endif
 }
 
 /* -------------------------------------------------------------------------- */
 
 template <operation op, typename Functor, typename... Grids>
 auto Loop::reduce(Functor&& func, Grids&&... containers)
     -> decltype(func(containers(0)...)) {
   auto begin =
       thrust::make_zip_iterator(thrust::make_tuple(containers.begin()...));
   auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end()...));
 
   using reduce_type = decltype(func(containers(0)...));
   using apply_type = detail::ApplyFunctor<Functor, reduce_type>;
 
   auto red_helper = detail::reduction_helper<op, apply_type>(apply_type(func));
   auto result = thrust::reduce(
       begin, end, red_helper.template init<reduce_type>(), red_helper);
 
 #ifdef USE_CUDA
   cudaDeviceSynchronize();
 #endif
   return result;
 }
 
 /* -------------------------------------------------------------------------- */
 
 template <operation op, typename Functor, typename... Grids>
 auto Loop::stridedReduce(Functor&& func, Grids&&... containers)
     -> decltype(func(containers(0)...)) {
   auto begin = thrust::make_zip_iterator(
       thrust::make_tuple(containers.begin(containers.getNbComponents())...));
   auto end = thrust::make_zip_iterator(
       thrust::make_tuple(containers.end(containers.getNbComponents())...));
 
   using reduce_type = decltype(func(containers(0)...));
   using apply_type = detail::ApplyFunctor<Functor, reduce_type>;
 
   auto red_helper = detail::reduction_helper<op, apply_type>(apply_type(func));
 
   auto result = thrust::reduce(
       begin, end, red_helper.template init<reduce_type>(), red_helper);
 
 #ifdef USE_CUDA
   cudaDeviceSynchronize();
 #endif
   return result;
 }
 
 /* -------------------------------------------------------------------------- */
 
 __END_TAMAAS__
 
 #undef EXEC_CASE_MACRO
 #undef REDUCE_CASE_MACRO
 
 #endif  // __LOOP_HH__
diff --git a/tests/test_loop.cpp b/tests/test_loop.cpp
index afc593e..064d694 100644
--- a/tests/test_loop.cpp
+++ b/tests/test_loop.cpp
@@ -1,263 +1,263 @@
 /**
  *
  * @author Lucas Frérot <lucas.frerot@epfl.ch>
  *
  * @section LICENSE
  *
  * Copyright (©)  2017 EPFL  (Ecole Polytechnique  Fédérale de
  * Lausanne)  Laboratory (LSMS  -  Laboratoire de  Simulation  en Mécanique  des
  * Solides)
  *
  * Tamaas is free  software: you can redistribute it and/or  modify it under the
  * terms  of the  GNU Lesser  General Public  License as  published by  the Free
  * Software Foundation, either version 3 of the License, or (at your option) any
  * later version.
  *
  * Tamaas is  distributed in the  hope that it  will be useful, but  WITHOUT ANY
  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  * A  PARTICULAR PURPOSE. See  the GNU  Lesser General  Public License  for more
  * details.
  *
  * You should  have received  a copy  of the GNU  Lesser General  Public License
  * along with Tamaas. If not, see <http://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #include "test.hh"
 #include "grid.hh"
 #include "static_types.hh"
 #include "loop.hh"
 /* -------------------------------------------------------------------------- */
 /* WARNING: here we cannot use lambdas for tests because GoogleTest declares  */
 /* test functions as private members of classes which is incompatible with    */
 /* cuda's extended lambdas. I know... it's f*cking stupid                     */
 /* -------------------------------------------------------------------------- */
 
 using namespace tamaas;
 
 template <typename T>
 struct AddOneInplace {
   CUDA_LAMBDA void operator()(T& x) { x += 1; }
 };
 
 // Testing loops on one grid
 TEST(TestLoops, OneArgument) {
   Grid<Real, 1> grid({20}, 1);
   Grid<Real, 1> solution({20}, 1);
 
   auto add_one = [] (Real & x) { return x + 1; };
 
   std::iota(grid.begin(), grid.end(), 1);
 
   // Makeing solution
   std::transform(grid.begin(), grid.end(), solution.begin(), add_one);
 
   auto add_one_inplace = AddOneInplace<Real>();
 
   Loop::loop(add_one_inplace, grid);
 
   ASSERT_TRUE(compare(grid, solution, AreFloatEqual())) << "One argument loop failed";
 }
 
 struct PrimalTest {
   CUDA_LAMBDA void operator()(Int& primal, Int& val) {
     val = (primal > 0)? -1:1;
   }
 };
 
 // Testing loops on two grids
 TEST(TestLoops, TwoArguments) {
   // Why no ints?
   Grid<Int, 2> grid({20, 20}, 1);
   Grid<Int, 2> primal({20, 20}, 1);
   Grid<Int, 2> solution({20, 20}, 1);
 
   primal(0, 0) = 1; primal(0, 1) = 1;
   primal(1, 0) = 1; primal(1, 1) = 1;
 
   std::transform(primal.begin(), primal.end(), solution.begin(),
 		 [] (Int & primal) {
 		   return (primal > 0)? -1:1;
 		 });
 
   auto primal_test = PrimalTest();
 
   Loop::loop(primal_test, primal, grid);
 
   ASSERT_TRUE(compare(solution, grid)) << "Two argument loop failed";
 }
 
 struct AssignUInt {
   CUDA_LAMBDA void operator()(UInt& x, UInt i) {
     x = i;
   }
 };
 
 // Testing an enumeration
 TEST(TestLoops, Enumeration) {
   Grid<UInt, 1> grid({100}, 1);
   Grid<UInt, 1> solution({100}, 1);
   std::iota(solution.begin(), solution.end(), 0);
 
   auto assign_uint = AssignUInt();
 
-  Loop::loop(assign_uint, grid, Loop::arange<UInt>(100));
+  Loop::loop(assign_uint, grid, Loop::range(100));
 
   ASSERT_TRUE(compare(solution, grid)) << "Enumeration loop failed";
 }
 
 /* -------------------------------------------------------------------------- */
 
 struct Identity {
   CUDA_LAMBDA UInt operator()(UInt& x) const { return x; }
 };
 
 // Testing one grid reductions
 TEST(TestReductions, OneArgument) {
   Grid<UInt, 1> grid({6}, 1);
   std::iota(grid.begin(), grid.end(), 1);
 
   const auto id = Identity();
 
   // Sum reduction
   UInt sol = std::accumulate(grid.begin(), grid.end(), 0, std::plus<UInt>());
   UInt red = Loop::reduce<operation::plus>(id, grid);
   ASSERT_TRUE(sol == red) << "Addition reduction failed on one argument";
 
   // Product reduction
   sol = std::accumulate(grid.begin(), grid.end(), 1, std::multiplies<UInt>());
   red = Loop::reduce<operation::times>(id, grid);
   ASSERT_TRUE(sol == red) << "Multiplication reduction failed on one argument";
 
   // Min reduction
   sol = *std::min_element(grid.begin(), grid.end());
   red = Loop::reduce<operation::min>(id, grid);
   ASSERT_TRUE(sol == red) << "Min reduction failed on one argument";
 
   // Max reduction
   sol = *std::max_element(grid.begin(), grid.end());
   red = Loop::reduce<operation::max>(id, grid);
   ASSERT_TRUE(sol == red) << "Max reduction failed on one argument";
 }
 
 struct PrimalReduce {
   CUDA_LAMBDA UInt operator()(UInt& p, UInt& val) {
     return (p > 0)? val:0;
   }
 };
 
 TEST(TestReductions, TwoArguments) {
   Grid<UInt, 1> grid({20}, 1);
   Grid<UInt, 1> primal({20}, 1);
   grid = 1;
   primal(0) = 1;
   primal(1) = 1;
 
   auto primal_reduce = PrimalReduce();
 
   // Reduce on values where primal > 0
   UInt red = Loop::reduce<operation::plus>(primal_reduce, primal, grid);
   ASSERT_TRUE(red == 2) << "Two args reduction failed";
 }
 
 /* -------------------------------------------------------------------------- */
 
 TEST(TestStridedLoops, OneArgument) {
   Grid<UInt, 2> grid({10, 10}, 2);
   std::iota(grid.begin(), grid.end(), 1);
 
   Grid<UInt, 2> solution({10, 10}, 2);
   solution = grid;
 
   std::for_each(solution.begin(), solution.end(), [] (UInt & x) {
       if (x % 2 == 1)
 	x += 1;
     });
 
   auto add_one_inplace = AddOneInplace<UInt>();
 
   Loop::stridedLoop(add_one_inplace, grid);
 
   ASSERT_TRUE(compare(solution, grid)) << "One argument strided loop failed";
 }
 
 template <typename T> using WrapVector = VectorProxy<T, 2>;
 
 struct AddOneVector {
   CUDA_LAMBDA void operator()(WrapVector<UInt>&& x) {
     x(0) += 1;
   }
 };
 
 TEST(TestStridedLoops, VectorStride) {
   Grid<UInt, 2> grid({10, 10}, 2);
   std::iota(grid.begin(), grid.end(), 1);
 
   Grid<UInt, 2> solution({10, 10}, 2);
   solution = grid;
 
   std::for_each(solution.begin(), solution.end(), [] (UInt & x) {
       if (x % 2 == 1)
 	x += 1;
     });
 
   auto add_one_inplace = AddOneVector();
 
   Loop::stridedLoop(add_one_inplace, grid);
 
   ASSERT_TRUE(compare(solution, grid)) << "Static vector strided loop failed";
 }
 
 template <typename T> using WrapMatrix = MatrixProxy<T, 2, 2>;
 
 struct SetOneMatrix {
   CUDA_LAMBDA void operator()(WrapMatrix<UInt>&& x) {
     x(0, 0) = 1;
     x(1, 1) = 1;
   }
 };
 
 TEST(TestStridedLoops, MatrixStride) {
   Grid<UInt, 2> grid({10, 10}, 4);
   Grid<UInt, 2> solution({10, 10}, 4);
   std::iota(solution.begin(), solution.end(), 0);
 
   std::for_each(solution.begin(), solution.end(), [] (UInt & x) {
       if (x % 4 == 0 || x % 4 == 3)
 	x = 1;
       else
 	x = 0;
     });
 
   auto set_one = SetOneMatrix();
 
   Loop::stridedLoop(set_one, grid);
 
   ASSERT_TRUE(compare(solution, grid)) << "Static matrix strided loop failed";
 }
 
 struct VectorReduction {
   CUDA_LAMBDA Vector<UInt, 3> operator()(VectorProxy<UInt, 3>&& v) const {
     return v;
   }
 };
 
 struct BroadcastSet123 {
   CUDA_LAMBDA inline void operator()(VectorProxy<UInt, 3>&& v) const {
     v(0) = 1;
     v(1) = 2;
     v(2) = 3;
   }
 };
 
 TEST(TestStridedReduction, VectorReduce) {
   Grid<UInt, 2> grid({10, 10}, 3);
 
   Loop::stridedLoop(BroadcastSet123(), grid);
 
   auto res = Loop::stridedReduce<operation::plus>(VectorReduction(), grid);
 
   ASSERT_EQ(res(0), 100);
   ASSERT_EQ(res(1), 200);
   ASSERT_EQ(res(2), 300);
 }