Page MenuHomec4science

loop.hh
No OneTemporary

File Metadata

Created
Fri, Nov 15, 01:29
/**
* @file
*
* @author Lucas Frérot <lucas.frerot@epfl.ch>
*
* @section LICENSE
*
* Copyright (©) 2017 EPFL (Ecole Polytechnique Fédérale de
* Lausanne) Laboratory (LSMS - Laboratoire de Simulation en Mécanique des
* Solides)
*
* Tamaas is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option) any
* later version.
*
* Tamaas is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Tamaas. If not, see <http://www.gnu.org/licenses/>.
*
*/
/* -------------------------------------------------------------------------- */
#ifndef __LOOP_HH__
#define __LOOP_HH__
/* -------------------------------------------------------------------------- */
#include "loops/apply.hh"
#include "loops/loop_utils.hh"
#include "tamaas.hh"
#include <thrust/execution_policy.h>
#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/transform_reduce.h>
#include <thrust/tuple.h>
#include <type_traits>
__BEGIN_TAMAAS__
template <typename T>
struct is_policy : std::false_type {};
template <>
struct is_policy<thrust::detail::host_t> : std::true_type {};
template <>
struct is_policy<thrust::detail::device_t> : std::true_type {};
template <>
struct is_policy<const thrust::detail::host_t> : std::true_type {};
template <>
struct is_policy<const thrust::detail::device_t> : std::true_type {};
template <>
struct is_policy<const thrust::detail::host_t&> : std::true_type {};
template <>
struct is_policy<const thrust::detail::device_t&> : std::true_type {};
/**
* @brief Singleton class for automated loops using lambdas
* This class is sweet candy :) It provides abstraction of the paralelism
* paradigm used in loops and allows simple and less erro-prone loop syntax,
* with minimum boiler plate. I love it <3
*/
class Loop {
public:
/// Backends enumeration
enum backend {
omp, ///< [OpenMP](http://www.openmp.org/specifications/) backend
cuda, ///< [Cuda](http://docs.nvidia.com/cuda/index.html) backend
};
/// Helper class to count iterations within lambda-loop
template <typename T>
class arange {
public:
using it_type = thrust::counting_iterator<T>;
arange(T size) : range_size(size) {}
it_type begin(UInt i = 0) const { return it_type(T(0)); }
it_type end(UInt i = 0) const { return it_type(range_size); }
UInt getNbComponents() const { return 1; }
private:
T range_size;
};
template <typename T>
static arange<T> range(T size) {
return arange<T>(size);
}
/// Loop functor over any number of grids
template <typename Functor, typename... Grids>
static void loop(Functor&& func, Grids&&... containers);
/// Strided loop over any number of grids with parallel policy
template <typename DerivedPolicy, typename Functor, typename... Grids>
static void stridedLoop(const thrust::execution_policy<DerivedPolicy>& policy,
Functor&& func, Grids&&... containers) {
stridedLoopImpl(policy, std::forward<Functor>(func),
std::forward<Grids>(containers)...);
}
/// Strided loop over any number of grids
template <typename Functor, typename... Grids>
static typename std::enable_if<not is_policy<Functor>::value, void>::type
stridedLoop(Functor&& func, Grids&&... containers) {
stridedLoopImpl(thrust::device, std::forward<Functor>(func),
std::forward<Grids>(containers)...);
}
private:
/// Implementation of strided loop overloads
template <typename DerivedPolicy, typename Functor, typename... Grids>
static void
stridedLoopImpl(const thrust::execution_policy<DerivedPolicy>& policy,
Functor&& func, Grids&&... containers);
public:
/// Reduce over any number of grids
template <operation op, typename Functor, typename... Grids>
static auto reduce(Functor&& func, Grids&&... containers)
-> decltype(func(containers(0)...));
/// Strided reduce over any number of grids
template <operation op, typename Functor, typename... Grids>
static auto stridedReduce(Functor&& func, Grids&&... containers)
-> decltype(func(containers(0)...));
/// Constructor
Loop() = delete;
};
/* -------------------------------------------------------------------------- */
/* Template implementation */
/* -------------------------------------------------------------------------- */
template <typename Functor, typename... Grids>
void Loop::loop(Functor&& func, Grids&&... containers) {
auto begin =
thrust::make_zip_iterator(thrust::make_tuple(containers.begin()...));
auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end()...));
thrust::for_each(begin, end, detail::ApplyFunctor<Functor>(func));
#ifdef USE_CUDA
cudaDeviceSynchronize();
#endif
}
/* -------------------------------------------------------------------------- */
template <typename DevicePolicy, typename Functor, typename... Grids>
void Loop::stridedLoopImpl(const thrust::execution_policy<DevicePolicy>& policy,
Functor&& func, Grids&&... containers) {
auto begin = thrust::make_zip_iterator(
thrust::make_tuple(containers.begin(containers.getNbComponents())...));
auto end = thrust::make_zip_iterator(
thrust::make_tuple(containers.end(containers.getNbComponents())...));
thrust::for_each(policy, begin, end, detail::ApplyFunctor<Functor>(func));
#ifdef USE_CUDA
cudaDeviceSynchronize();
#endif
}
/* -------------------------------------------------------------------------- */
template <operation op, typename Functor, typename... Grids>
auto Loop::reduce(Functor&& func, Grids&&... containers)
-> decltype(func(containers(0)...)) {
auto begin =
thrust::make_zip_iterator(thrust::make_tuple(containers.begin()...));
auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end()...));
using reduce_type = decltype(func(containers(0)...));
using apply_type = detail::ApplyFunctor<Functor, reduce_type>;
auto red_helper = detail::reduction_helper<op, apply_type>(apply_type(func));
auto result = thrust::reduce(
begin, end, red_helper.template init<reduce_type>(), red_helper);
#ifdef USE_CUDA
cudaDeviceSynchronize();
#endif
return result;
}
/* -------------------------------------------------------------------------- */
template <operation op, typename Functor, typename... Grids>
auto Loop::stridedReduce(Functor&& func, Grids&&... containers)
-> decltype(func(containers(0)...)) {
auto begin = thrust::make_zip_iterator(
thrust::make_tuple(containers.begin(containers.getNbComponents())...));
auto end = thrust::make_zip_iterator(
thrust::make_tuple(containers.end(containers.getNbComponents())...));
using reduce_type = decltype(func(containers(0)...));
using apply_type = detail::ApplyFunctor<Functor, reduce_type>;
auto red_helper = detail::reduction_helper<op, apply_type>(apply_type(func));
auto result = thrust::reduce(
begin, end, red_helper.template init<reduce_type>(), red_helper);
#ifdef USE_CUDA
cudaDeviceSynchronize();
#endif
return result;
}
/* -------------------------------------------------------------------------- */
__END_TAMAAS__
#undef EXEC_CASE_MACRO
#undef REDUCE_CASE_MACRO
#endif // __LOOP_HH__

Event Timeline