diff --git a/SConstruct b/SConstruct index 1a4db53..3eb7a4d 100644 --- a/SConstruct +++ b/SConstruct @@ -1,240 +1,242 @@ from __future__ import print_function import os from os.path import join, abspath from version import write_info_file def detect_fftw(env): """Detect fftw on clusters""" fftw_include = "" fftw_library = "" # If FFTW is provided by module system (on clusters) if 'FFTW_ROOT' in env['ENV']: fftw_include = join(env['ENV']['FFTW_ROOT'], 'include') fftw_library = join(env['ENV']['FFTW_ROOT'], 'lib') # Setting up FFTW env['FFTW_LIBRARY_WISH'] = ['main', 'omp'] env['FFTW_INCLUDE_DIR'] = fftw_include env['FFTW_LIBRARY_DIR'] = fftw_library env.Tool(fftw) def detect_cuda(env): """Detect cuda on clusters""" if 'CUDA_ROOT' in env['ENV']: env['CUDA_TOOLKIT_PATH'] = env['ENV']['CUDA_ROOT'] else: env['CUDA_TOOLKIT_PATH'] = '/opt/cuda' env['CUDA_COMPONENTS'] = ['cufft'] env['CUDA_ARCH_FLAG'] = '-arch=sm_35' colors = env['COLOR_DICT'] if not env['verbose']: env['NVCCCOMSTR'] = u'{0}[Compiling (cuda)] {1}$SOURCE{2}'.format(colors['green'], colors['blue'], colors['end']) env['SHLINKCOMSTR'] = u'{0}[Linking (cuda)] {1}$TARGET{2}'.format(colors['purple'], colors['blue'], colors['end']) env.AppendUnique(CXXFLAGS="-expt-extended-lambda") # experimental lambda support env.AppendUnique(CXXFLAGS="-expt-relaxed-constexpr") # experimental lambda support + if env['build_type'] == 'debug': + env.AppendUnique(CXXFLAGS="-G") env.Tool('nvcc') def detect_boost(env): """Detect boost on clusters""" if 'BOOST_ROOT' in env['ENV']: env.AppendUnique(CPPPATH=[join(env['ENV']['BOOST_ROOT'], 'include')]) def gen_print(action_string, color_string, env): """Generic function for creating pretty compile output""" if env['verbose']: return None def print_fun(command, target, source, env): colors = env['COLOR_DICT'] print("{}[{}] {}{}{}".format(colors[color_string], action_string, colors['blue'], target[0], colors['end'])) return print_fun # Compilation colors colors = { 'cyan': '\033[96m', 'purple': '\033[95m', 'blue': '\033[94m', 'green': '\033[92m', 'yellow': '\033[93m', 'red': '\033[91m', 'end': '\033[0m' } # Inherit all environment variables (for CXX detection, etc.) main_env = Environment(ENV=os.environ) main_env['COLOR_DICT'] = colors # Compiler detection compiler_default = 'g++' if 'CXX' in os.environ: compiler_default = os.environ['CXX'] # Build variables vars = Variables('build-setup.conf') vars.Add(EnumVariable('build_type', 'Build type', 'release', allowed_values=('release', 'profiling', 'debug'), ignorecase=2)) vars.Add('prefix', 'Prefix where to install', '/usr/local') vars.Add('CXX', 'Compiler', compiler_default) vars.Add('py_exec', 'Python executable', 'python') vars.Add(BoolVariable('timer', 'Activate the timer possibilities', False)) vars.Add(BoolVariable('verbose', 'Activate verbosity', False)) vars.Add(BoolVariable('build_doc', 'Build documentation', False)) vars.Add(BoolVariable('color', 'Color the non-verbose compilation output', False)) vars.Add(BoolVariable('cuda', 'Activate the CUDA library', False)) vars.Update(main_env) Help(vars.GenerateHelpText(main_env)) # Save all options, not just those that differ from default with open('build-setup.conf', 'w') as setup: for key in vars.keys(): setup.write("{} = '{}'\n".format(key, main_env[key])) build_type = main_env['build_type'] build_dir = 'build-' + main_env['build_type'] print("Building in " + build_dir) verbose = main_env['verbose'] # Remove colors if not set if not main_env['color']: for key in colors: colors[key] = '' # Setting object suffix main_env['SHOBJSUFFIX'] = '.o' if not verbose: main_env['SHCXXCOMSTR'] = u'{0}[Compiling] {1}$SOURCE{2}'.format(colors['green'], colors['blue'], colors['end']) main_env['SHLINKCOMSTR'] = u'{0}[Linking] {1}$TARGET{2}'.format(colors['purple'], colors['blue'], colors['end']) main_env['SWIGCOMSTR'] = u'{0}[Swig] {1}$SOURCE{2}'.format(colors['yellow'], colors['blue'], colors['end']) # Include paths main_env.AppendUnique(CPPPATH=['#/src', '#/src/core', '#/src/bem', '#/src/surface', '#/src/python', '#/src/percolation', '#/src/model', '#/src/solvers', '#/src/gpu', '#/python']) # Changing the shared object extension main_env['SHOBJSUFFIX'] = '.o' # Treating Intel compiler for OpenMP if main_env['CXX'] != 'icpc': omp_libs = ['gomp'] omp_flag = '-fopenmp' else: omp_libs = [''] omp_flag = '-qopenmp' main_env.AppendUnique(LIBS=omp_libs) # main_env.AppendUnique(LINKFLAGS=[omp_flag]) # Flags and options main_env.AppendUnique(CXXFLAGS=['-std=c++11', '-Wall', omp_flag]) # Adding compile flags defined in evironment if 'CXXFLAGS' in os.environ: main_env.AppendUnique(CXXFLAGS=Split(os.environ['CXXFLAGS'])) if main_env['timer']: main_env.AppendUnique(CPPDEFINES=['USING_TIMER']) if build_type == 'debug': main_env.AppendUnique(CPPDEFINES=['TAMAAS_DEBUG']) if main_env['cuda']: main_env.AppendUnique(CPPDEFINES=['USE_CUDA']) # Compilation flags cxxflags_dict = { "debug": Split("-g -O0"), "profiling": Split("-g -pg -O2"), "release": Split("-O3") } # Link flags for shared libs shlinkflags_dict = { "debug": [], "profiling": ['-pg'], "release": [] } main_env.AppendUnique(CXXFLAGS=cxxflags_dict[build_type]) main_env.AppendUnique(SHLINKFLAGS=shlinkflags_dict[build_type]) main_env['LIBPATH'] = [abspath(join(build_dir, 'src'))] main_env['RPATH'] = "$LIBPATH" detect_fftw(main_env) detect_boost(main_env) # Activate cuda if needed if main_env['cuda']: detect_cuda(main_env) # Writing information file write_info_file("src/tamaas_info.cpp") # Saving the env file env_content = """export PYTHONPATH=$PYTHONPATH:{0}/python export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{0}/src """ def write_env_file(target, source, env): """Builder to write content to file""" with open(str(target[0]), 'w') as env_file: env_file.write(env_content.format(abspath(build_dir))) main_env['gen_print'] = gen_print env_file_env = main_env.Clone(PRINT_CMD_LINE_FUNC=gen_print("Writing", "cyan", main_env)) # Need to have a command and manage tamaas_environement.sh as target because # the build directory does not always exist env_file_env.Command(join(build_dir, 'tamaas_environement.sh'), None, write_env_file) Export('main_env') # Building subdirs def subdir(dir): SConscript(join(dir, 'SConscript'), variant_dir=join(build_dir, dir), duplicate=True) for dir in ['src', 'python', 'tests']: subdir(dir) # Building documentation if main_env['build_doc']: subdir('doc') diff --git a/src/core/loops/loop_utils.hh b/src/core/loops/loop_utils.hh index dde0acd..7742891 100644 --- a/src/core/loops/loop_utils.hh +++ b/src/core/loops/loop_utils.hh @@ -1,106 +1,106 @@ /** * @file * * @author Lucas Frérot * * @section LICENSE * * Copyright (©) 2017 EPFL (Ecole Polytechnique Fédérale de * Lausanne) Laboratory (LSMS - Laboratoire de Simulation en Mécanique des * Solides) * * Tamaas is free software: you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) any * later version. * * Tamaas is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public License * along with Tamaas. If not, see . * */ /* -------------------------------------------------------------------------- */ #ifndef __LOOP_UTILS_HH__ #define __LOOP_UTILS_HH__ /* -------------------------------------------------------------------------- */ #include "tamaas.hh" #include "loop.hh" #include #include /* -------------------------------------------------------------------------- */ __BEGIN_TAMAAS__ namespace detail { template -constexpr UInt loopSize(Grids&&... grids) { +UInt loopSize(Grids&&... grids) { return (only_points)? std::get<0>(std::forward_as_tuple(grids...)).getNbPoints(): std::get<0>(std::forward_as_tuple(grids...)).dataSize(); } template struct reduction_helper; template <> struct reduction_helper { template static constexpr T init() { return T(0); } template static void reduce(T& res, T val) { res += val; } }; template <> struct reduction_helper { template static constexpr T init() { return T(1); } template static void reduce(T& res, T val) { res *= val; } }; template <> struct reduction_helper { template static constexpr T init() { return std::numeric_limits::max(); } template static void reduce(T& res, T val) { res = std::min(res, val); } }; template <> struct reduction_helper { template static constexpr T init() { return std::numeric_limits::lowest(); } template static void reduce(T& res, T val) { res = std::max(res, val); } }; } __END_TAMAAS__ #endif // __LOOP_UTILS_HH diff --git a/src/core/tamaas.cpp b/src/core/tamaas.cpp index 705ab11..da5c1f4 100644 --- a/src/core/tamaas.cpp +++ b/src/core/tamaas.cpp @@ -1,58 +1,58 @@ /** * @file * * @author Lucas Frérot * * @section LICENSE * * Copyright (©) 2016 EPFL (Ecole Polytechnique Fédérale de * Lausanne) Laboratory (LSMS - Laboratoire de Simulation en Mécanique des * Solides) * * Tamaas is free software: you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) any * later version. * * Tamaas is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public License * along with Tamaas. If not, see . * */ /* -------------------------------------------------------------------------- */ #include #include #include "tamaas.hh" #include "fft_plan_manager.hh" #include "loop.hh" /* -------------------------------------------------------------------------- */ __BEGIN_TAMAAS__ void initialize(UInt num_threads) { fftw_init_threads(); if (num_threads) omp_set_num_threads(num_threads); fftw_plan_with_nthreads(omp_get_max_threads()); #ifdef USE_CUDA - Loop::init(Loop::omp); // for now + Loop::init(Loop::cuda); // for now #else Loop::init(Loop::omp); #endif } /* -------------------------------------------------------------------------- */ void finalize() { FFTPlanManager::get().clean(); fftw_cleanup_threads(); } __END_TAMAAS__ diff --git a/src/gpu/loops/loop_cuda.hh b/src/gpu/loops/loop_cuda.hh index 919926b..c8704ed 100644 --- a/src/gpu/loops/loop_cuda.hh +++ b/src/gpu/loops/loop_cuda.hh @@ -1,75 +1,81 @@ /** * @file * * @author Lucas Frérot * * @section LICENSE * * Copyright (©) 2017 EPFL (Ecole Polytechnique Fédérale de * Lausanne) Laboratory (LSMS - Laboratoire de Simulation en Mécanique des * Solides) * * Tamaas is free software: you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) any * later version. * * Tamaas is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public License * along with Tamaas. If not, see . * */ /* -------------------------------------------------------------------------- */ #ifndef __LOOP_CUDA_HH__ #define __LOOP_CUDA_HH__ /* -------------------------------------------------------------------------- */ #include "tamaas.hh" #include "loops/loop_utils.hh" #include __BEGIN_TAMAAS__ namespace detail { #define GRID_STRIDED_LOOP(operation, size) do { \ const UInt index = blockIdx.x * blockDim.x + threadIdx.x; \ const UInt stride = blockDim.x * gridDim.x; \ for (UInt i = index ; i < (size) ; i += stride) { \ (operation); \ }} while(0) \ template __global__ void exec_loop_kernel(Functor& func, UInt size, T... data_pointers) { - GRID_STRIDED_LOOP(func(data_pointers[i]...), size); + //GRID_STRIDED_LOOP(func(data_pointers[i]...), size); + UInt index = blockIdx.x * blockDim.x + threadIdx.x; + UInt stride = blockDim.x * gridDim.x; + for (UInt i = index ; i < size ; i += stride) { + func(data_pointers[i]...); + } } template void exec_loop_cuda(Functor&& func, Grids&&... grids) { Int device = 0, threads_per_block = 0; cudaGetDevice(&device); cudaDeviceGetAttribute(&threads_per_block, cudaDevAttrMaxThreadsPerBlock, device); const UInt loop_size = loopSize(grids...); UInt blocks = loop_size / threads_per_block; blocks += (threads_per_block * blocks == loop_size)? 0 : 1; // make sure we cover all data - exec_loop_kernel<<>>(func, loop_size, grids.getInternalData()...); + //exec_loop_kernel<<>>(func, loop_size, grids.getInternalData()...); + exec_loop_kernel<<<1, 1>>>(func, loop_size, grids.getInternalData()...); } template auto reduce_loop_cuda(Functor&& func, Grids&&... grids) -> decltype(func(grids(0)...)) { return func(grids(0)...); } #undef GRID_STRIDED_LOOP } // namespace detail __END_TAMAAS__ #endif // __LOOP_CUDA_HH__ diff --git a/tests/SConscript b/tests/SConscript index 005d35f..90c4048 100644 --- a/tests/SConscript +++ b/tests/SConscript @@ -1,50 +1,51 @@ from __future__ import print_function from os.path import join, abspath Import('main_env') print("Environment for tests") test_env = main_env.Clone( PRINT_CMD_LINE_FUNC=main_env['gen_print']("Copying", "red", main_env)) test_files = Split(""" run_tests.sh test_hertz_pressure.py test_westergaard.py test_patch_westergaard.py test_surface.py test_autocorrelation.py test_hertz_disp.py test_hertz_kato.py test_hertz_adhesion.py test_saturated_pressure.py test_fftransform.py test_bem_grid.py test_flood_fill.py """) src_dir = "#/tests" build_dir = 'build-' + main_env['build_type'] + '/tests' for file in test_files: source = join(src_dir, file) test_env.Command(file, source, Copy("$TARGET", "$SOURCE")) crit_env = main_env.Clone(tools=[criterion]) if 'SHCXXCOMSTR' in main_env: crit_env['CXXCOMSTR'] = main_env['SHCXXCOMSTR'] if 'SHLINKCOMSTR' in main_env: crit_env['LINKCOMSTR'] = main_env['SHLINKCOMSTR'] crit_env.AppendUnique(LIBS=['Tamaas'], LIBPATH=[abspath('build-' + main_env['build_type'] + '/src')]) cpp_test_files = Split(""" test_grid.cpp test_loop.cpp +test_cuda.cpp """) for file in cpp_test_files: crit_env.Program(file) diff --git a/tests/test_cuda.cpp b/tests/test_cuda.cpp new file mode 100644 index 0000000..3fc3c6e --- /dev/null +++ b/tests/test_cuda.cpp @@ -0,0 +1,15 @@ +#include "tamaas.hh" +#include "grid.hh" + +using namespace tamaas; + +int main() { + initialize(); + Grid grid({20}, 1); + Loop::loop([] CUDA_LAMBDA (Real & x) { + x = 1; + }, grid); + std::cout << grid << std::endl; + finalize(); + return 0; +}