Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F66544799
loop.hh
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Jun 11, 06:53
Size
7 KB
Mime Type
text/x-c++
Expires
Thu, Jun 13, 06:53 (2 d)
Engine
blob
Format
Raw Data
Handle
18237995
Attached To
rTAMAAS tamaas
loop.hh
View Options
/**
* @file
*
* @author Lucas Frérot <lucas.frerot@epfl.ch>
*
* @section LICENSE
*
* Copyright (©) 2017 EPFL (Ecole Polytechnique Fédérale de
* Lausanne) Laboratory (LSMS - Laboratoire de Simulation en Mécanique des
* Solides)
*
* Tamaas is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option) any
* later version.
*
* Tamaas is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Tamaas. If not, see <http://www.gnu.org/licenses/>.
*
*/
/* -------------------------------------------------------------------------- */
#ifndef __LOOP_HH__
#define __LOOP_HH__
/* -------------------------------------------------------------------------- */
#include "loops/apply.hh"
#include "loops/loop_utils.hh"
#include "tamaas.hh"
#include <thrust/execution_policy.h>
#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/transform_reduce.h>
#include <thrust/tuple.h>
#include <type_traits>
__BEGIN_TAMAAS__
template <typename T>
struct is_policy : std::false_type {};
template <>
struct is_policy<thrust::detail::host_t> : std::true_type {};
template <>
struct is_policy<thrust::detail::device_t> : std::true_type {};
template <>
struct is_policy<const thrust::detail::host_t> : std::true_type {};
template <>
struct is_policy<const thrust::detail::device_t> : std::true_type {};
template <>
struct is_policy<const thrust::detail::host_t&> : std::true_type {};
template <>
struct is_policy<const thrust::detail::device_t&> : std::true_type {};
/**
* @brief Singleton class for automated loops using lambdas
* This class is sweet candy :) It provides abstraction of the paralelism
* paradigm used in loops and allows simple and less erro-prone loop syntax,
* with minimum boiler plate. I love it <3
*/
class Loop {
public:
/// Backends enumeration
enum backend {
omp, ///< [OpenMP](http://www.openmp.org/specifications/) backend
cuda, ///< [Cuda](http://docs.nvidia.com/cuda/index.html) backend
};
/// Helper class to count iterations within lambda-loop
template <typename T>
class arange {
public:
using it_type = thrust::counting_iterator<T>;
arange(T size) : range_size(size) {}
it_type begin(UInt i = 0) const { return it_type(T(0)); }
it_type end(UInt i = 0) const { return it_type(range_size); }
UInt getNbComponents() const { return 1; }
private:
T range_size;
};
template <typename T>
static arange<T> range(T size) {
return arange<T>(size);
}
/// Loop functor over any number of grids
template <typename Functor, typename... Grids>
static void loop(Functor&& func, Grids&&... containers);
/// Strided loop over any number of grids with parallel policy
template <typename DerivedPolicy, typename Functor, typename... Grids>
static void stridedLoop(const thrust::execution_policy<DerivedPolicy>& policy,
Functor&& func, Grids&&... containers) {
stridedLoopImpl(policy, std::forward<Functor>(func),
std::forward<Grids>(containers)...);
}
/// Strided loop over any number of grids
template <typename Functor, typename... Grids>
static typename std::enable_if<not is_policy<Functor>::value, void>::type
stridedLoop(Functor&& func, Grids&&... containers) {
stridedLoopImpl(thrust::device, std::forward<Functor>(func),
std::forward<Grids>(containers)...);
}
private:
/// Implementation of strided loop overloads
template <typename DerivedPolicy, typename Functor, typename... Grids>
static void
stridedLoopImpl(const thrust::execution_policy<DerivedPolicy>& policy,
Functor&& func, Grids&&... containers);
public:
/// Reduce over any number of grids
template <operation op, typename Functor, typename... Grids>
static auto reduce(Functor&& func, Grids&&... containers)
-> decltype(func(containers(0)...));
/// Strided reduce over any number of grids
template <operation op, typename Functor, typename... Grids>
static auto stridedReduce(Functor&& func, Grids&&... containers)
-> decltype(func(containers(0)...));
/// Constructor
Loop() = delete;
};
/* -------------------------------------------------------------------------- */
/* Template implementation */
/* -------------------------------------------------------------------------- */
template <typename Functor, typename... Grids>
void Loop::loop(Functor&& func, Grids&&... containers) {
auto begin =
thrust::make_zip_iterator(thrust::make_tuple(containers.begin()...));
auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end()...));
thrust::for_each(begin, end, detail::ApplyFunctor<Functor>(func));
#ifdef USE_CUDA
cudaDeviceSynchronize();
#endif
}
/* -------------------------------------------------------------------------- */
template <typename DevicePolicy, typename Functor, typename... Grids>
void Loop::stridedLoopImpl(
const thrust::execution_policy<DevicePolicy>& /*policy*/, Functor&& func,
Grids&&... containers) {
auto begin = thrust::make_zip_iterator(
thrust::make_tuple(containers.begin(containers.getNbComponents())...));
auto end = thrust::make_zip_iterator(
thrust::make_tuple(containers.end(containers.getNbComponents())...));
thrust::for_each(begin, end, detail::ApplyFunctor<Functor>(func));
#ifdef USE_CUDA
cudaDeviceSynchronize();
#endif
}
/* -------------------------------------------------------------------------- */
template <operation op, typename Functor, typename... Grids>
auto Loop::reduce(Functor&& func, Grids&&... containers)
-> decltype(func(containers(0)...)) {
auto begin =
thrust::make_zip_iterator(thrust::make_tuple(containers.begin()...));
auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end()...));
using reduce_type = decltype(func(containers(0)...));
using apply_type = detail::ApplyFunctor<Functor, reduce_type>;
auto red_helper = detail::reduction_helper<op, apply_type>(apply_type(func));
auto result = thrust::reduce(
begin, end, red_helper.template init<reduce_type>(), red_helper);
#ifdef USE_CUDA
cudaDeviceSynchronize();
#endif
return result;
}
/* -------------------------------------------------------------------------- */
template <operation op, typename Functor, typename... Grids>
auto Loop::stridedReduce(Functor&& func, Grids&&... containers)
-> decltype(func(containers(0)...)) {
auto begin = thrust::make_zip_iterator(
thrust::make_tuple(containers.begin(containers.getNbComponents())...));
auto end = thrust::make_zip_iterator(
thrust::make_tuple(containers.end(containers.getNbComponents())...));
using reduce_type = decltype(func(containers(0)...));
using apply_type = detail::ApplyFunctor<Functor, reduce_type>;
auto red_helper = detail::reduction_helper<op, apply_type>(apply_type(func));
auto result = thrust::reduce(
begin, end, red_helper.template init<reduce_type>(), red_helper);
#ifdef USE_CUDA
cudaDeviceSynchronize();
#endif
return result;
}
/* -------------------------------------------------------------------------- */
__END_TAMAAS__
#undef EXEC_CASE_MACRO
#undef REDUCE_CASE_MACRO
#endif // __LOOP_HH__
Event Timeline
Log In to Comment