diff --git a/SConstruct b/SConstruct index 4487c48..b1dad1e 100644 --- a/SConstruct +++ b/SConstruct @@ -1,258 +1,258 @@ from __future__ import print_function import os from os.path import join, abspath from version import write_info_file def detect_fftw(env): """Detect fftw on clusters""" fftw_include = "" fftw_library = "" # If FFTW is provided by module system (on clusters) if 'FFTW_ROOT' in env['ENV']: fftw_include = join(env['ENV']['FFTW_ROOT'], 'include') fftw_library = join(env['ENV']['FFTW_ROOT'], 'lib') # Setting up FFTW env['FFTW_LIBRARY_WISH'] = ['main', 'omp'] env['FFTW_INCLUDE_DIR'] = fftw_include env['FFTW_LIBRARY_DIR'] = fftw_library env.Tool(fftw) def detect_cuda(env): """Detect cuda on clusters""" if 'CUDA_ROOT' in env['ENV']: env['CUDA_TOOLKIT_PATH'] = env['ENV']['CUDA_ROOT'] else: env['CUDA_TOOLKIT_PATH'] = '/opt/cuda' env['CUDA_COMPONENTS'] = ['cufft'] env['CUDA_ARCH_FLAG'] = '-arch=sm_35' colors = env['COLOR_DICT'] if not env['verbose']: env['NVCCCOMSTR'] = u'{0}[Compiling (cuda)] {1}$SOURCE{2}'.format(colors['green'], colors['blue'], colors['end']) env['SHLINKCOMSTR'] = u'{0}[Linking (cuda)] {1}$TARGET{2}'.format(colors['purple'], colors['blue'], colors['end']) env.AppendUnique(CXXFLAGS="-expt-extended-lambda") # experimental lambda support env.AppendUnique(CXXFLAGS="-expt-relaxed-constexpr") # experimental lambda support if env['build_type'] == 'debug': env.AppendUnique(CXXFLAGS="-G") env.Tool('nvcc') def detect_boost(env): """Detect boost on clusters""" if 'BOOST_ROOT' in env['ENV']: env['BOOST_INCLUDE_DIR'] = join(env['ENV']['BOOST_ROOT'], 'include') env.Tool(boost) def detect_thrust(env): """Detect cuda on clusters""" if 'CUDA_ROOT' in env['ENV']: - env['THRUST_INCLUDE_DIR'] = env['ENV']['CUDA_ROOT'] + env['THRUST_INCLUDE_DIR'] = join(env['ENV']['CUDA_ROOT'], 'include') else: env['THRUST_INCLUDE_DIR'] = '/opt/cuda/include' env.Tool(thrust) def gen_print(action_string, color_string, env): """Generic function for creating pretty compile output""" if env['verbose']: return None def print_fun(command, target, source, env): colors = env['COLOR_DICT'] print("{}[{}] {}{}{}".format(colors[color_string], action_string, colors['blue'], target[0], colors['end'])) return print_fun # Compilation colors colors = { 'cyan': '\033[96m', 'purple': '\033[95m', 'blue': '\033[94m', 'green': '\033[92m', 'yellow': '\033[93m', 'red': '\033[91m', 'end': '\033[0m' } # Inherit all environment variables (for CXX detection, etc.) main_env = Environment(ENV=os.environ) main_env['COLOR_DICT'] = colors # Compiler detection compiler_default = 'g++' if 'CXX' in os.environ: compiler_default = os.environ['CXX'] # Build variables vars = Variables('build-setup.conf') vars.Add(EnumVariable('build_type', 'Build type', 'release', allowed_values=('release', 'profiling', 'debug'), ignorecase=2)) vars.Add(EnumVariable('backend', 'Thrust backend', 'omp', allowed_values=('omp', 'cuda'), ignorecase=2)) vars.Add('prefix', 'Prefix where to install', '/usr/local') vars.Add('CXX', 'Compiler', compiler_default) vars.Add('py_exec', 'Python executable', 'python') vars.Add(BoolVariable('timer', 'Activate the timer possibilities', False)) vars.Add(BoolVariable('verbose', 'Activate verbosity', False)) vars.Add(BoolVariable('build_doc', 'Build documentation', False)) vars.Add(BoolVariable('color', 'Color the non-verbose compilation output', False)) vars.Update(main_env) Help(vars.GenerateHelpText(main_env)) # Save all options, not just those that differ from default with open('build-setup.conf', 'w') as setup: for key in vars.keys(): setup.write("{} = '{}'\n".format(key, main_env[key])) build_type = main_env['build_type'] build_dir = 'build-' + main_env['build_type'] print("Building in " + build_dir) verbose = main_env['verbose'] # Remove colors if not set if not main_env['color']: for key in colors: colors[key] = '' # Setting object suffix main_env['SHOBJSUFFIX'] = '.o' if not verbose: main_env['SHCXXCOMSTR'] = u'{0}[Compiling] {1}$SOURCE{2}'.format(colors['green'], colors['blue'], colors['end']) main_env['SHLINKCOMSTR'] = u'{0}[Linking] {1}$TARGET{2}'.format(colors['purple'], colors['blue'], colors['end']) main_env['SWIGCOMSTR'] = u'{0}[Swig] {1}$SOURCE{2}'.format(colors['yellow'], colors['blue'], colors['end']) # Include paths main_env.AppendUnique(CPPPATH=['#/src', '#/src/core', '#/src/bem', '#/src/surface', '#/src/python', '#/src/percolation', '#/src/model', '#/src/solvers', '#/src/gpu', '#/python']) # Changing the shared object extension main_env['SHOBJSUFFIX'] = '.o' # Treating Intel compiler for OpenMP if main_env['CXX'] != 'icpc': omp_libs = ['gomp'] omp_flag = '-fopenmp' else: omp_libs = [''] omp_flag = '-qopenmp' main_env.AppendUnique(LIBS=omp_libs) # main_env.AppendUnique(LINKFLAGS=[omp_flag]) # Flags and options main_env.AppendUnique(CXXFLAGS=['-std=c++11', '-Wall', omp_flag]) if main_env['backend'] == 'omp': main_env.AppendUnique(CPPDEFINES=["THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP"]) elif main_env['backend'] == 'cuda': main_env.AppendUnique(CPPDEFINES=["THRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CUDA"]) main_env.AppendUnique(CPPDEFINES=['USE_CUDA']) # Adding compile flags defined in evironment if 'CXXFLAGS' in os.environ: main_env.AppendUnique(CXXFLAGS=Split(os.environ['CXXFLAGS'])) if main_env['timer']: main_env.AppendUnique(CPPDEFINES=['USING_TIMER']) if build_type == 'debug': main_env.AppendUnique(CPPDEFINES=['TAMAAS_DEBUG']) # Compilation flags cxxflags_dict = { "debug": Split("-g -O0"), "profiling": Split("-g -pg -O2"), "release": Split("-O3") } # Link flags for shared libs shlinkflags_dict = { "debug": [], "profiling": ['-pg'], "release": [] } main_env.AppendUnique(CXXFLAGS=cxxflags_dict[build_type]) main_env.AppendUnique(SHLINKFLAGS=shlinkflags_dict[build_type]) main_env['LIBPATH'] = [abspath(join(build_dir, 'src'))] main_env['RPATH'] = "$LIBPATH" detect_fftw(main_env) detect_boost(main_env) detect_thrust(main_env) # Activate cuda if needed if main_env['backend'] == 'cuda': detect_cuda(main_env) # Writing information file write_info_file("src/tamaas_info.cpp") # Saving the env file env_content = """export PYTHONPATH=$PYTHONPATH:{0}/python export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{0}/src """ def write_env_file(target, source, env): """Builder to write content to file""" with open(str(target[0]), 'w') as env_file: env_file.write(env_content.format(abspath(build_dir))) main_env['gen_print'] = gen_print env_file_env = main_env.Clone(PRINT_CMD_LINE_FUNC=gen_print("Writing", "cyan", main_env)) # Need to have a command and manage tamaas_environement.sh as target because # the build directory does not always exist env_file_env.Command(join(build_dir, 'tamaas_environement.sh'), None, write_env_file) Export('main_env') # Building subdirs def subdir(dir): SConscript(join(dir, 'SConscript'), variant_dir=join(build_dir, dir), duplicate=True) for dir in ['src', 'python', 'tests']: subdir(dir) # Building documentation if main_env['build_doc']: subdir('doc') diff --git a/src/core/loop.hh b/src/core/loop.hh index a77fbc0..3ec40b8 100644 --- a/src/core/loop.hh +++ b/src/core/loop.hh @@ -1,156 +1,171 @@ /** * @file * * @author Lucas Frérot * * @section LICENSE * * Copyright (©) 2017 EPFL (Ecole Polytechnique Fédérale de * Lausanne) Laboratory (LSMS - Laboratoire de Simulation en Mécanique des * Solides) * * Tamaas is free software: you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) any * later version. * * Tamaas is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public License * along with Tamaas. If not, see . * */ /* -------------------------------------------------------------------------- */ #ifndef __LOOP_HH__ #define __LOOP_HH__ /* -------------------------------------------------------------------------- */ #include "tamaas.hh" #include "loops/apply.hh" #include "loops/loop_utils.hh" #include #include #include #include #include #include __BEGIN_TAMAAS__ /** * @brief Singleton class for automated loops using lambdas * This class is sweet candy :) It provides abstraction of the paralelism * paradigm used in loops and allows simple and less erro-prone loop syntax, * with minimum boiler plate. I love it <3 */ class Loop { public: /// Backends enumeration enum backend { omp, ///< [OpenMP](http://www.openmp.org/specifications/) backend cuda, ///< [Cuda](http://docs.nvidia.com/cuda/index.html) backend }; /// Helper class to count iterations within lambda-loop template class arange { public: using it_type = thrust::counting_iterator; arange(T size):range_size(size) {} it_type begin() const { return it_type(T(0)); } it_type end() const { return it_type(range_size); } private: T range_size; }; /// Loop functor over any number of grids template static void loop(Functor&& func, Grids&&... containers); /// Strided loop over any number of grids template static void stridedLoop(Functor&& func, Grids&&... containers); /// Reduce over any number of grids template static auto reduce(Functor&& func, Grids&&... containers) -> decltype(func(containers(0)...)); /// Strided reduce over any number of grids template static auto stridedReduce(Functor&& func, Grids&&... containers) -> decltype(func(containers(0)...)); /// Constructor Loop() = delete; }; /* -------------------------------------------------------------------------- */ /* Template implementation */ /* -------------------------------------------------------------------------- */ template void Loop::loop(Functor&& func, Grids&&... containers) { auto begin = thrust::make_zip_iterator(thrust::make_tuple(containers.begin()...)); auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end()...)); thrust::for_each(begin, end, detail::ApplyFunctor(func)); + +#ifdef USE_CUDA + cudaDeviceSynchronize(); +#endif } /* -------------------------------------------------------------------------- */ template void Loop::stridedLoop(Functor&& func, Grids&&... containers) { auto begin = thrust::make_zip_iterator(thrust::make_tuple(containers.begin(containers.getNbComponents())...)); auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end(containers.getNbComponents())...)); thrust::for_each(begin, end, detail::ApplyFunctor(func)); +#ifdef USE_CUDA + cudaDeviceSynchronize(); +#endif } /* -------------------------------------------------------------------------- */ template auto Loop::reduce(Functor&& func, Grids&&... containers) -> decltype(func(containers(0)...)) { auto begin = thrust::make_zip_iterator(thrust::make_tuple(containers.begin()...)); auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end()...)); using reduce_type = decltype(func(containers(0)...)); using apply_type = detail::ApplyFunctor; auto red_helper = detail::reduction_helper(apply_type(func)); + auto result = thrust::reduce(begin, end, red_helper.template init(), red_helper); - return thrust::reduce(begin, end, red_helper.template init(), red_helper); +#ifdef USE_CUDA + cudaDeviceSynchronize(); +#endif + return result; } /* -------------------------------------------------------------------------- */ template auto Loop::stridedReduce(Functor&& func, Grids&&... containers) -> decltype(func(containers(0)...)) { auto begin = thrust::make_zip_iterator(thrust::make_tuple(containers.begin(containers.getNbComponents())...)); auto end = thrust::make_zip_iterator(thrust::make_tuple(containers.end(containers.getNbComponents())...)); using reduce_type = decltype(func(containers(0)...)); using apply_type = detail::ApplyFunctor; auto red_helper = detail::reduction_helper(apply_type(func)); - return thrust::reduce(begin, end, red_helper.template init(), red_helper); + auto result = thrust::reduce(begin, end, red_helper.template init(), red_helper); +#ifdef USE_CUDA + cudaDeviceSynchronize(); +#endif + return result; } /* -------------------------------------------------------------------------- */ __END_TAMAAS__ #undef EXEC_CASE_MACRO #undef REDUCE_CASE_MACRO #endif // __LOOP_HH__ diff --git a/src/core/loops/apply.hh b/src/core/loops/apply.hh index a73b0ed..aa4440e 100644 --- a/src/core/loops/apply.hh +++ b/src/core/loops/apply.hh @@ -1,147 +1,149 @@ /** * @file * * @author Lucas Frérot * * @section LICENSE * * Copyright (©) 2017 EPFL (Ecole Polytechnique Fédérale de * Lausanne) Laboratory (LSMS - Laboratoire de Simulation en Mécanique des * Solides) * * Tamaas is free software: you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) any * later version. * * Tamaas is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public License * along with Tamaas. If not, see . * */ /* -------------------------------------------------------------------------- */ #ifndef __APPLY_HH__ #define __APPLY_HH__ /* -------------------------------------------------------------------------- */ #include "tamaas.hh" #include #include #include /* -------------------------------------------------------------------------- */ __BEGIN_TAMAAS__ namespace detail { /// Helper function for application of a functor on a thrust::tuple template struct Apply; template <> struct Apply<0> { template __host__ __device__ static auto apply(Functor&& func, Tuple&& t [[gnu::unused]]) -> decltype(func()) { return func(); } }; template <> struct Apply<1> { template __host__ __device__ static auto apply(Functor&& func, Tuple&& t) -> decltype(func(thrust::get<0>(std::forward(t)))) { return func(thrust::get<0>(std::forward(t))); } }; template <> struct Apply<2> { template __host__ __device__ static auto apply(Functor&& func, Tuple&& t) -> decltype(func(thrust::get<0>(std::forward(t)), thrust::get<1>(std::forward(t)))) { return func(thrust::get<0>(std::forward(t)), thrust::get<1>(std::forward(t))); } }; template <> struct Apply<3> { template __host__ __device__ static auto apply(Functor&& func, Tuple&& t) -> decltype(func(thrust::get<0>(std::forward(t)), thrust::get<1>(std::forward(t)), thrust::get<2>(std::forward(t)))) { return func(thrust::get<0>(std::forward(t)), thrust::get<1>(std::forward(t)), thrust::get<2>(std::forward(t))); } }; template <> struct Apply<4> { template __host__ __device__ static auto apply(Functor&& func, Tuple&& t) -> decltype(func(thrust::get<0>(std::forward(t)), thrust::get<1>(std::forward(t)), thrust::get<2>(std::forward(t)), thrust::get<3>(std::forward(t)))) { return func(thrust::get<0>(std::forward(t)), thrust::get<1>(std::forward(t)), thrust::get<2>(std::forward(t)), thrust::get<3>(std::forward(t))); } }; template <> struct Apply<5> { template __host__ __device__ static auto apply(Functor&& func, Tuple&& t) -> decltype(func(thrust::get<0>(std::forward(t)), thrust::get<1>(std::forward(t)), thrust::get<2>(std::forward(t)), thrust::get<3>(std::forward(t)), thrust::get<4>(std::forward(t)))) { return func(thrust::get<0>(std::forward(t)), thrust::get<1>(std::forward(t)), thrust::get<2>(std::forward(t)), thrust::get<3>(std::forward(t)), thrust::get<4>(std::forward(t))); } }; /// Helper class for functor application in thrust template class ApplyFunctor { public: + __host__ __device__ ApplyFunctor(const Functor & functor):functor(functor) {} + __host__ __device__ ApplyFunctor(const ApplyFunctor & o):functor(o.functor) {} template __host__ __device__ ret_type operator()(Tuple&& t) const { return Apply::type>::value>:: apply(functor, std::forward(t)); } private: const Functor & functor; }; } // namespace detail __END_TAMAAS__ #endif