diff --git a/SConstruct b/SConstruct index 74268c2..f18588c 100644 --- a/SConstruct +++ b/SConstruct @@ -1,478 +1,480 @@ # -*- mode:python; coding: utf-8 -*- # vim: set ft=python: # @file # LICENSE # # Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), # Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # ------------------------------------------------------------------------------ # Imports # ------------------------------------------------------------------------------ from __future__ import print_function import sys import os from subprocess import check_output # Import below not strictly necessary, but good for pep8 from SCons.Script import ( EnsurePythonVersion, EnsureSConsVersion, Help, Environment, Variables, EnumVariable, PathVariable, BoolVariable, ListVariable, Split, Export, Dir, ) from SCons.Errors import StopError from SCons import __version__ as scons_version from version import get_git_subst from detect import ( FindFFTW, FindBoost, FindThrust, FindCuda, FindExpolit, FindPybind11 ) from INFOS import TAMAAS_INFOS # ------------------------------------------------------------------------------ EnsurePythonVersion(2, 7) EnsureSConsVersion(2, 4) # ------------------------------------------------------------------------------ def detect_dependencies(env): "Detect all dependencies" fftw_comp = { 'omp': ['omp'], 'threads': ['threads'], 'none': [], } fftw_components = fftw_comp[env['fftw_threads']] if main_env['use_mpi']: fftw_components.append('mpi') FindFFTW(env, fftw_components, precision=env['real_type']) if main_env['backend'] == 'cuda': FindCuda(env) FindBoost(env, ['boost/preprocessor/seq.hpp']) FindExpolit(env) # Use thrust shipped with cuda if cuda is requested thrust_var = 'CUDA_ROOT' if 'CUDA_ROOT' in env['ENV'] else 'THRUST_ROOT' FindThrust(env, env['backend'], thrust_var) if env['build_python']: FindPybind11(env) def subdir(env, dir): "Building a sub-directory" return env.SConscript(env.File('SConscript', dir), variant_dir=env.Dir(dir, env['build_dir']), duplicate=True) def print_build_info(env): info = ("-- Tamaas ${version}\n" + "-- SCons {} (Python {}.{})\n".format(scons_version, sys.version_info.major, sys.version_info.minor) + "-- Build type: ${build_type}\n" + "-- Thrust backend: ${backend}\n" + "-- FFTW threads: ${fftw_threads}\n" + "-- MPI: ${use_mpi}\n" + "-- Build directory: ${build_dir}\n" + "-- Python version (bindings): $py_version") print(env.subst(info)) # ------------------------------------------------------------------------------ # Main compilation # ------------------------------------------------------------------------------ # Compilation colors colors = { 'cyan': '\033[96m', 'purple': '\033[95m', 'blue': '\033[94m', 'green': '\033[92m', 'yellow': '\033[93m', 'gray': '\033[38;5;8m', 'orange': '\033[38;5;208m', 'red': '\033[91m', 'end': '\033[0m' } # Inherit all environment variables (for CXX detection, etc.) main_env = Environment( ENV=os.environ, ) # Set tamaas information for k, v in TAMAAS_INFOS._asdict().items(): main_env[k] = v main_env['COLOR_DICT'] = colors main_env.AddMethod(subdir, 'SubDirectory') # Build variables vars = Variables('build-setup.conf') vars.AddVariables( EnumVariable('build_type', 'Build type', 'release', allowed_values=('release', 'profiling', 'debug'), ignorecase=2), EnumVariable('backend', 'Thrust backend', 'omp', allowed_values=('cpp', 'omp', 'tbb', 'cuda'), ignorecase=2), EnumVariable('fftw_threads', 'Threads FFTW library preference', 'omp', allowed_values=('omp', 'threads', 'none'), ignorecase=2), EnumVariable('sanitizer', 'Sanitizer type', 'none', allowed_values=('none', 'memory', 'leaks', 'address'), ignorecase=2), PathVariable('prefix', 'Prefix where to install', '/usr/local'), # Dependencies paths PathVariable('FFTW_ROOT', 'FFTW custom path', os.getenv('FFTW_ROOT', ''), PathVariable.PathAccept), PathVariable('THRUST_ROOT', 'Thrust custom path', os.getenv('THRUST_ROOT', ''), PathVariable.PathAccept), PathVariable('BOOST_ROOT', 'Boost custom path', os.getenv('BOOST_ROOT', ''), PathVariable.PathAccept), PathVariable('CUDA_ROOT', 'Cuda custom path', os.getenv('CUDA_ROOT', ''), PathVariable.PathAccept), # Dependencies provided as submodule get different default PathVariable('GTEST_ROOT', 'Googletest custom path', os.getenv('GTEST_ROOT', '#third-party/googletest/googletest'), PathVariable.PathAccept), PathVariable('PYBIND11_ROOT', 'Pybind11 custom path', os.getenv('PYBIND11_ROOT', '#third-party/pybind11/include'), PathVariable.PathAccept), PathVariable('EXPOLIT_ROOT', 'Expolit custom path', os.getenv('EXPOLIT_ROOT', '#third-party/expolit/include'), PathVariable.PathAccept), # Executables ('CXX', 'Compiler', os.getenv('CXX', 'g++')), ('MPICXX', 'MPI Compiler wrapper', os.getenv('MPICXX', 'mpicxx')), ('py_exec', 'Python executable', 'python3'), # Compiler flags ('CXXFLAGS', 'C++ compiler flags', os.getenv('CXXFLAGS', "")), # Cosmetic BoolVariable('verbose', 'Activate verbosity', False), BoolVariable('color', 'Color the non-verbose compilation output', False), # Tamaas components BoolVariable('build_doc', 'Build documentation', False), BoolVariable('build_tests', 'Build test suite', False), BoolVariable('build_python', 'Build python wrapper', True), # Documentation ListVariable('doc_builders', 'Generated documentation formats', default='html', names=Split("html man")), # TODO include latex # Dependencies BoolVariable('use_googletest', 'Build tests using GTest', False), BoolVariable('use_mpi', 'Builds multi-process parallelism', False), # Distribution options BoolVariable('strip_info', 'Strip binary of added information', False), BoolVariable('build_static_lib', "Build a static libTamaas", False), # Type variables EnumVariable('real_type', 'Type for real precision variables', 'double', allowed_values=('double', 'long double')), EnumVariable('integer_type', 'Type for integer variables', 'int', allowed_values=('int', 'long')), ) # Set variables of environment vars.Update(main_env) help_text = vars.GenerateHelpText(main_env) help_text += """ Commands: scons [build] [options]... Compile Tamaas (and additional modules/tests) scons install [prefix=/your/prefix] [options]... Install Tamaas to prefix scons dev Install symlink to Tamaas python module (useful to development purposes) scons test Run tests with pytest scons doc Compile documentation with Doxygen and Sphinx+Breathe scons archive Create a gzipped archive from source """ # noqa Help(help_text) # Save all options, not just those that differ from default with open('build-setup.conf', 'w') as setup: for option in vars.options: setup.write("# " + option.help.replace('\n', '\n# ') + "\n") setup.write("{} = '{}'\n".format(option.key, main_env[option.key])) main_env['should_configure'] = \ not main_env.GetOption('clean') and not main_env.GetOption('help') build_type = main_env['build_type'] build_dir = 'build-${build_type}' main_env['build_dir'] = main_env.Dir(build_dir) # Setting up the python name with version if main_env['build_python']: args = (main_env.subst("${py_exec} -c").split() + ["from distutils.sysconfig import get_python_version;" "print(get_python_version())"]) main_env['py_version'] = bytes(check_output(args)).decode() # Printing some build infos if main_env['should_configure']: print_build_info(main_env) verbose = main_env['verbose'] # Remove colors if not set if not main_env['color']: for key in colors: colors[key] = '' if not verbose: main_env['CXXCOMSTR'] = main_env['SHCXXCOMSTR'] = \ u'{0}[Compiling ($SHCXX)] {1}$SOURCE'.format(colors['green'], colors['end']) main_env['LINKCOMSTR'] = main_env['SHLINKCOMSTR'] = \ u'{0}[Linking] {1}$TARGET'.format(colors['purple'], colors['end']) main_env['ARCOMSTR'] = u'{}[Ar]{} $TARGET'.format(colors['purple'], colors['end']) main_env['RANLIBCOMSTR'] = \ u'{}[Randlib]{} $TARGET'.format(colors['purple'], colors['end']) main_env['PRINT_CMD_LINE_FUNC'] = pretty_cmd_print main_env['INSTALLSTR'] = \ u'{}[Installing] {}$SOURCE to $TARGET'.format(colors['blue'], colors['end']) # Include paths main_env.AppendUnique(CPPPATH=['#/src', '#/src/core', - '#/src/mpi', '#/src/bem', '#/src/surface', - '#/src/python', '#/src/percolation', '#/src/model', '#/src/model/elasto_plastic', '#/src/solvers', '#/src/gpu', '#/python']) # Changing the shared object extension main_env['SHOBJSUFFIX'] = '.o' +# Variables for clarity +main_env['use_cuda'] = main_env['backend'] == "cuda" +main_env['use_fftw'] = not main_env['use_cuda'] + # Back to gcc if cuda is activated if main_env['backend'] == "cuda" and "g++" not in main_env['CXX']: raise StopError('GCC should be used when compiling with CUDA') # OpenMP flags - compiler dependent omp_flags = { "g++": ["-fopenmp"], "clang++": ["-fopenmp"], "icpc": ["-qopenmp"] } def cxx_alias(cxx): for k in omp_flags.keys(): if k in cxx: return k raise StopError('Unsupported compiler: ' + cxx) cxx = cxx_alias(main_env['CXX']) # Setting main compilation flags main_env['CXXFLAGS'] = Split(main_env['CXXFLAGS']) main_env['LINKFLAGS'] = main_env['CXXFLAGS'] main_env.AppendUnique( CXXFLAGS=Split('-std=c++14 -Wall -Wextra'), CPPDEFINES={ 'TAMAAS_LOOP_BACKEND': 'TAMAAS_LOOP_BACKEND_${backend.upper()}', 'TAMAAS_FFTW_BACKEND': 'TAMAAS_FFTW_BACKEND_${fftw_threads.upper()}' }, ) if main_env['backend'] != 'cuda': main_env.AppendUnique(CXXFLAGS=['-pedantic']) # Adding OpenMP flags if main_env['backend'] == 'omp': main_env.AppendUnique(CXXFLAGS=omp_flags[cxx]) main_env.AppendUnique(LINKFLAGS=omp_flags[cxx]) else: main_env.AppendUnique(CXXFLAGS=['-Wno-unknown-pragmas']) # Correct bug in clang? if main_env['backend'] == 'omp' and cxx == "clang++": main_env.AppendUnique(LIBS=["atomic"]) elif main_env['backend'] == 'tbb': main_env.AppendUnique(LIBS=['tbb']) # Manage MPI compiler if main_env['use_mpi']: main_env['CXX'] = '$MPICXX' main_env.AppendUnique(CPPDEFINES=['TAMAAS_USE_MPI']) main_env.AppendUnique(CXXFLAGS=['-Wno-cast-function-type']) # Flags and options if main_env['build_type'] == 'debug': main_env.AppendUnique(CPPDEFINES=['TAMAAS_DEBUG']) # Define the scalar types main_env.AppendUnique(CPPDEFINES={'TAMAAS_REAL_TYPE': '${real_type}', 'TAMAAS_INT_TYPE': '${integer_type}'}) # Compilation flags cxxflags_dict = { "debug": Split("-g -O0"), "profiling": Split("-g -O3 -fno-omit-frame-pointer"), "release": Split("-O3") } if main_env['sanitizer'] != 'none': if main_env['backend'] == 'cuda': raise StopError( "Sanitizers with cuda are not yet supported!") cxxflags_dict[build_type].append('-fsanitize=${sanitizer}') main_env.AppendUnique(CXXFLAGS=cxxflags_dict[build_type]) main_env.AppendUnique(SHLINKFLAGS=cxxflags_dict[build_type]) main_env.AppendUnique(LINKFLAGS=cxxflags_dict[build_type]) if main_env['should_configure']: basic_checks(main_env) detect_dependencies(main_env) # Writing information file main_env.Tool('textfile') main_env['SUBST_DICT'] = get_git_subst() # Empty values if requested if main_env['strip_info']: for k in main_env['SUBST_DICT']: main_env['SUBST_DICT'][k] = "" # Substitution of environment file main_env['SUBST_DICT'].update({ '@build_type@': '$build_type', '@build_dir@': '${build_dir.abspath}', '@build_version@': '$version', '@backend@': '$backend', }) # Environment file content env_content = """export PYTHONPATH=@build_dir@/python:$$PYTHONPATH export LD_LIBRARY_PATH=@build_dir@/src:$$LD_LIBRARY_PATH """ # Writing environment file env_file = main_env.Textfile( main_env.File('tamaas_environment.sh', main_env['build_dir']), env_content) # Default targets build_targets = ['build-cpp', env_file] install_targets = ['install-lib'] if main_env._get_major_minor_revision(scons_version)[0] >= 4: main_env.Tool('compilation_db') main_env.CompilationDatabase(PRINT_CMD_LINE_FUNC=pretty_cmd_print) # Building Tamaas library Export('main_env') main_env.SubDirectory('src') # Building Tamaas extra components for dir in ['python', 'tests']: if main_env['build_{}'.format(dir)] and not main_env.GetOption('help'): main_env.SubDirectory(dir) build_targets.append('build-{}'.format(dir)) # Building API + Sphinx documentation if requested if main_env['build_doc']: main_env.SubDirectory('doc') main_env.Alias('doc', 'build-doc') install_targets.append('install-doc') else: dummy_command(main_env, 'doc', 'Command "doc" does not do anything' ' without documentation activated ("build_doc=True")') # Define dummy dev command when python is deactivated if not main_env['build_python']: dummy_command(main_env, 'dev', 'Command "dev" does not do anything' + ' without python activated ("build_python=True")') else: install_targets.append('install-python') # Define dummy test command when tests are deactivated if not main_env['build_tests']: dummy_command(main_env, 'test', 'Command "test" does not do anything' + ' without tests activated ("build_tests=True")') # Definition of target aliases, a.k.a. sub-commands main_env.Alias('build', build_targets) # Define proper install targets main_env.Alias('install', install_targets) # Default target is to build stuff main_env.Default('build') # Building a tar archive archive = main_env.Command( 'tamaas-${version}.tar.gz', '', ('git archive ' '--format=tar.gz ' '--prefix=tamaas/ ' '-o $TARGET HEAD'), ) main_env.Alias('archive', archive) diff --git a/src/SConscript b/src/SConscript index 222cb32..8fc8240 100644 --- a/src/SConscript +++ b/src/SConscript @@ -1,158 +1,146 @@ # -*- mode:python; coding: utf-8 -*- # vim: set ft=python: # @file # LICENSE # # Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), # Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from SCons.Script import Import, Export def prepend(env, path, list): return [env.File(x, path) for x in list] Import('main_env') env = main_env.Clone() env.AddMethod(prepend, 'PrependDir') # Core core_list = """ fft_engine.cpp -fftw_engine.cpp grid.cpp grid_hermitian.cpp statistics.cpp tamaas.cpp loop.cpp computes.cpp logger.cpp mpi_interface.cpp """.split() core_list = env.PrependDir('core', core_list) +if env['use_fftw']: + core_list += ['core/fftw/fftw_engine.cpp'] + + if env['use_mpi']: + core_list += ['core/fftw/mpi/fftw_mpi_engine.cpp'] + +if env['use_cuda']: + core_list += ['core/cuda/cufft_engine.cpp'] + info_file = env.Substfile('tamaas_info.cpp', 'tamaas_info.cpp.in') core_list.append(info_file) # Lib roughcontact generator_list = """ surface_generator.cpp surface_generator_filter.cpp surface_generator_random_phase.cpp isopowerlaw.cpp regularized_powerlaw.cpp """.split() generator_list = env.PrependDir('surface', generator_list) # Lib PERCOLATION percolation_list = """ flood_fill.cpp """.split() percolation_list = env.PrependDir('percolation', percolation_list) # Model model_list = """ model.cpp model_factory.cpp model_type.cpp model_template.cpp integral_operator.cpp be_engine.cpp westergaard.cpp elastic_functional.cpp meta_functional.cpp adhesion_functional.cpp volume_potential.cpp kelvin.cpp mindlin.cpp boussinesq.cpp hooke.cpp elasto_plastic/isotropic_hardening.cpp elasto_plastic/residual.cpp integration/element.cpp """.split() model_list = env.PrependDir('model', model_list) # Solvers solvers_list = """ contact_solver.cpp polonsky_keer_rey.cpp kato_saturated.cpp kato.cpp beck_teboulle.cpp condat.cpp polonsky_keer_tan.cpp ep_solver.cpp dfsane_solver.cpp epic.cpp """.split() solvers_list = env.PrependDir('solvers', solvers_list) -# GPU API -gpu_list = """ -cufft_engine.cpp -""".split() -gpu_list = env.PrependDir('gpu', gpu_list) - -# MPI API -mpi_list = """ -fftw_mpi_engine.cpp -""".split() -mpi_list = env.PrependDir('mpi', mpi_list) - # Assembling total list rough_contact_list = \ core_list + generator_list + percolation_list + model_list + solvers_list -# Adding GPU if needed -if env['backend'] == 'cuda': - rough_contact_list += gpu_list - -# Adding MPI if needed -if env['use_mpi']: - rough_contact_list += mpi_list - # Adding extra warnings for Tamaas base lib env.AppendUnique(CXXFLAGS=['-Wextra']) # Allowing libTamaas.so to find libs in the same directory env.AppendUnique(RPATH=["'$$$$ORIGIN'"]) # Build static library for packaging if env['build_static_lib']: env.AppendUnique(CXXFLAGS='-fPIC') libTamaas = env.StaticLibrary('Tamaas', rough_contact_list) # Build shared library (default) else: libTamaas = env.SharedLibrary('Tamaas', rough_contact_list, SHLIBVERSION=env['version']) # Specify install target to install lib lib_prefix = env.Dir('lib', env['prefix']) lib_install = env.InstallVersionedLib(target=lib_prefix, source=libTamaas) # Defining alias targets main_env.Alias('build-cpp', libTamaas) main_env.Alias('install-lib', lib_install) # Export target for use in python builds Export('libTamaas') diff --git a/src/gpu/cufft_engine.cpp b/src/core/cuda/cufft_engine.cpp similarity index 100% rename from src/gpu/cufft_engine.cpp rename to src/core/cuda/cufft_engine.cpp diff --git a/src/gpu/cufft_engine.hh b/src/core/cuda/cufft_engine.hh similarity index 100% rename from src/gpu/cufft_engine.hh rename to src/core/cuda/cufft_engine.hh diff --git a/src/gpu/unified_allocator.hh b/src/core/cuda/unified_allocator.hh similarity index 100% rename from src/gpu/unified_allocator.hh rename to src/core/cuda/unified_allocator.hh diff --git a/src/core/fft_engine.cpp b/src/core/fft_engine.cpp index d37c81a..daece46 100644 --- a/src/core/fft_engine.cpp +++ b/src/core/fft_engine.cpp @@ -1,60 +1,59 @@ /** * @file * LICENSE * * Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ #include "fft_engine.hh" #if TAMAAS_USE_CUDA -#include "cufft_engine.hh" +#include "cuda/cufft_engine.hh" #else -#include "fftw_engine.hh" -#endif - +#include "fftw/fftw_engine.hh" #ifdef TAMAAS_USE_MPI -#include "fftw_mpi_engine.hh" +#include "fftw/mpi/fftw_mpi_engine.hh" #include "mpi_interface.hh" #endif +#endif /* -------------------------------------------------------------------------- */ namespace tamaas { std::unique_ptr FFTEngine::makeEngine(unsigned int flags) { #define inst(x) \ do { \ Logger().get(LogLevel::debug) << TAMAAS_DEBUG_MSG("[" #x "] Init"); \ return std::make_unique(flags); \ } while (0) #ifdef TAMAAS_USE_MPI if (mpi::size() != 1) inst(FFTWMPIEngine); else inst(FFTWEngine); #elif TAMAAS_USE_CUDA inst(CuFFTEngine); #else inst(FFTWEngine); #endif #undef inst } } // namespace tamaas diff --git a/src/core/fftw_allocator.hh b/src/core/fftw/fftw_allocator.hh similarity index 100% copy from src/core/fftw_allocator.hh copy to src/core/fftw/fftw_allocator.hh diff --git a/src/core/fftw_engine.cpp b/src/core/fftw/fftw_engine.cpp similarity index 100% rename from src/core/fftw_engine.cpp rename to src/core/fftw/fftw_engine.cpp diff --git a/src/core/fftw_engine.hh b/src/core/fftw/fftw_engine.hh similarity index 97% rename from src/core/fftw_engine.hh rename to src/core/fftw/fftw_engine.hh index 873fedf..3dfe0f8 100644 --- a/src/core/fftw_engine.hh +++ b/src/core/fftw/fftw_engine.hh @@ -1,105 +1,105 @@ /** * @file * LICENSE * * Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ -#ifndef FFTW_ENGINE_H -#define FFTW_ENGINE_H +#ifndef FFTW_ENGINE_HH +#define FFTW_ENGINE_HH /* -------------------------------------------------------------------------- */ #include "fft_engine.hh" -#include "fftw_interface.hh" +#include "fftw/interface.hh" /* -------------------------------------------------------------------------- */ namespace tamaas { class FFTWEngine : public FFTEngine { -private: +protected: using plan_t = std::pair, fftw::plan>; using complex_t = fftw::helper::complex; /// Perform forward (R2C) transform template void forwardImpl(const Grid& real, GridHermitian& spectral); /// Perform backward (C2R) transform template void backwardImpl(Grid& real, const GridHermitian& spectral); /// Return the plans pair for a given transform signature plan_t& getPlans(key_t key); public: /// Initialize with flags explicit FFTWEngine(unsigned int flags = FFTW_ESTIMATE) noexcept : _flags(flags), plans() {} void forward(const Grid& real, GridHermitian& spectral) override { forwardImpl(real, spectral); } void forward(const Grid& real, GridHermitian& spectral) override { forwardImpl(real, spectral); } void backward(Grid& real, GridHermitian& spectral) override { backwardImpl(real, spectral); } void backward(Grid& real, GridHermitian& spectral) override { backwardImpl(real, spectral); } unsigned int flags() const { return _flags; } /// Cast to FFTW complex type static auto cast(Complex* data) { return reinterpret_cast(data); } static auto cast(const Complex* data) { return const_cast(reinterpret_cast(data)); } protected: unsigned int _flags; ///< FFTW flags std::map plans; ///< plans corresponding to signatures }; /* -------------------------------------------------------------------------- */ template void FFTWEngine::forwardImpl(const Grid& real, GridHermitian& spectral) { auto& plans = getPlans(make_key(real, spectral)); fftw::execute(plans.first, const_cast(real.getInternalData()), cast(spectral.getInternalData())); } template void FFTWEngine::backwardImpl(Grid& real, const GridHermitian& spectral) { auto& plans = getPlans(make_key(real, spectral)); fftw::execute(plans.second, cast(spectral.getInternalData()), real.getInternalData()); // Normalize real *= (1. / real.getNbPoints()); } } // namespace tamaas #endif diff --git a/src/core/fftw_allocator.hh b/src/core/fftw/interface.hh similarity index 59% copy from src/core/fftw_allocator.hh copy to src/core/fftw/interface.hh index c8b5932..03a9032 100644 --- a/src/core/fftw_allocator.hh +++ b/src/core/fftw/interface.hh @@ -1,52 +1,40 @@ /** * @file * LICENSE * * Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ -#ifndef FFTW_ALLOCATOR_HH -#define FFTW_ALLOCATOR_HH -/* -------------------------------------------------------------------------- */ -#include -#include +#ifndef FFTW_INTERFACE_HH +#define FFTW_INTERFACE_HH /* -------------------------------------------------------------------------- */ -namespace tamaas { +#ifdef TAMAAS_USE_FFTW +#include "interface_impl.hh" +#include "mpi/interface.hh" + +namespace fftw = fftw_impl; -/// Class allocating -/// [SIMD](http://www.fftw.org/fftw3_doc/SIMD-alignment-and-fftw_005fmalloc.html#SIMD-alignment-and-fftw_005fmalloc) -/// aligned memory -template -struct FFTWAllocator { - /// Allocate memory - static T* allocate(std::size_t n) noexcept { - T* p = nullptr; - p = (T*)fftw_malloc(sizeof(T) * n); - return p; - } +#else +#include "interface_dummy.hh" - /// Free memory - static void deallocate(T* p, __attribute__((unused)) std::size_t n) noexcept { - fftw_free(p); - } -}; +namespace fftw = fftw_dummy; -} // namespace tamaas +#endif // TAMAAS_USE_FFTW -#endif // FFTW_ALLOCATOR_HH +#endif // FFTW_INTERFACE diff --git a/src/core/fftw_allocator.hh b/src/core/fftw/interface_dummy.hh similarity index 59% rename from src/core/fftw_allocator.hh rename to src/core/fftw/interface_dummy.hh index c8b5932..a5941bd 100644 --- a/src/core/fftw_allocator.hh +++ b/src/core/fftw/interface_dummy.hh @@ -1,52 +1,27 @@ /** * @file * LICENSE * * Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ -#ifndef FFTW_ALLOCATOR_HH -#define FFTW_ALLOCATOR_HH +#ifndef FFTW_INTERFACE_DUMMY_HH +#define FFTW_INTERFACE_DUMMY_HH /* -------------------------------------------------------------------------- */ -#include -#include -/* -------------------------------------------------------------------------- */ - -namespace tamaas { - -/// Class allocating -/// [SIMD](http://www.fftw.org/fftw3_doc/SIMD-alignment-and-fftw_005fmalloc.html#SIMD-alignment-and-fftw_005fmalloc) -/// aligned memory -template -struct FFTWAllocator { - /// Allocate memory - static T* allocate(std::size_t n) noexcept { - T* p = nullptr; - p = (T*)fftw_malloc(sizeof(T) * n); - return p; - } - - /// Free memory - static void deallocate(T* p, __attribute__((unused)) std::size_t n) noexcept { - fftw_free(p); - } -}; - -} // namespace tamaas -#endif // FFTW_ALLOCATOR_HH +#endif // FFTW_INTERFACE diff --git a/src/core/fftw_interface.hh b/src/core/fftw/interface_impl.hh similarity index 97% rename from src/core/fftw_interface.hh rename to src/core/fftw/interface_impl.hh index 2d6d71d..bcc3556 100644 --- a/src/core/fftw_interface.hh +++ b/src/core/fftw/interface_impl.hh @@ -1,209 +1,206 @@ /** * @file * LICENSE * * Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ -#ifndef FFTW_INTERFACE -#define FFTW_INTERFACE +#ifndef FFTW_INTERFACE_IMPL_HH +#define FFTW_INTERFACE_IMPL_HH /* -------------------------------------------------------------------------- */ #include +#include #include #include #include -#include -#ifdef TAMAAS_USE_MPI -#include -#endif - -namespace fftw { +namespace fftw_impl { template struct helper; template <> struct helper { using complex = fftw_complex; using plan = fftw_plan; static auto alloc_real(std::size_t size) { return fftw_alloc_real(size); } static auto alloc_complex(std::size_t size) { return fftw_alloc_complex(size); } }; template <> struct helper { using complex = fftwl_complex; using plan = fftwl_plan; static auto alloc_real(std::size_t size) { return fftwl_alloc_real(size); } static auto alloc_complex(std::size_t size) { return fftwl_alloc_complex(size); } }; template inline auto free(T* ptr) { fftw_free(ptr); } inline auto destroy(fftw_plan plan) { fftw_destroy_plan(plan); } inline auto destroy(fftwl_plan plan) { fftwl_destroy_plan(plan); } inline auto init_threads() { return fftw_init_threads(); } inline auto plan_with_nthreads(int nthreads) { return fftw_plan_with_nthreads(nthreads); } inline auto cleanup_threads() { return fftw_cleanup_threads(); } /// Holder type for fftw plans template struct plan { typename helper::plan _plan; /// Create from plan explicit plan(typename helper::plan _plan = nullptr) : _plan(_plan) {} /// Move constructor to avoid accidental plan destruction plan(plan&& o) : _plan(std::exchange(o._plan, nullptr)) {} /// Move operator plan& operator=(plan&& o) { _plan = std::exchange(o._plan, nullptr); return *this; } /// Destroy plan ~plan() noexcept { if (_plan) - fftw::destroy(_plan); + fftw_impl::destroy(_plan); } /// For seamless use with fftw api operator typename helper::plan() const { return _plan; } }; /// RAII helper for fftw_free template struct ptr { T* _ptr; ~ptr() noexcept { if (_ptr) - fftw::free(_ptr); + fftw_impl::free(_ptr); } operator T*() { return _ptr; } }; /* -------------------------------------------------------------------------- */ inline auto plan_many_forward(int rank, const int* n, int howmany, double* in, const int* inembed, int istride, int idist, fftw_complex* out, const int* onembed, int ostride, int odist, unsigned flags) { return fftw_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist, flags); } inline auto plan_many_backward(int rank, const int* n, int howmany, fftw_complex* in, const int* inembed, int istride, int idist, double* out, const int* onembed, int ostride, int odist, unsigned flags) { return fftw_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist, flags); } inline auto plan_1d_forward(int n, double* in, fftw_complex* out, unsigned flags) { return fftw_plan_dft_r2c_1d(n, in, out, flags); } inline auto plan_1d_backward(int n, fftw_complex* in, double* out, unsigned flags) { return fftw_plan_dft_c2r_1d(n, in, out, flags); } inline auto plan_2d_forward(int n0, int n1, double* in, fftw_complex* out, unsigned flags) { return fftw_plan_dft_r2c_2d(n0, n1, in, out, flags); } inline auto plan_2d_backward(int n0, int n1, fftw_complex* out, double* in, unsigned flags) { return fftw_plan_dft_c2r_2d(n0, n1, out, in, flags); } inline auto execute(fftw_plan plan) { fftw_execute(plan); } inline auto execute(fftw_plan plan, double* in, fftw_complex* out) { fftw_execute_dft_r2c(plan, in, out); } inline auto execute(fftw_plan plan, fftw_complex* in, double* out) { fftw_execute_dft_c2r(plan, in, out); } /* -------------------------------------------------------------------------- */ inline auto plan_many_forward(int rank, const int* n, int howmany, long double* in, const int* inembed, int istride, int idist, fftwl_complex* out, const int* onembed, int ostride, int odist, unsigned flags) { return fftwl_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist, flags); } inline auto plan_many_backward(int rank, const int* n, int howmany, fftwl_complex* in, const int* inembed, int istride, int idist, long double* out, const int* onembed, int ostride, int odist, unsigned flags) { return fftwl_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist, flags); } inline auto plan_1d_forward(int n, long double* in, fftwl_complex* out, unsigned flags) { return fftwl_plan_dft_r2c_1d(n, in, out, flags); } inline auto plan_1d_backward(int n, fftwl_complex* in, long double* out, unsigned flags) { return fftwl_plan_dft_c2r_1d(n, in, out, flags); } inline auto plan_2d_forward(int n0, int n1, long double* in, fftwl_complex* out, unsigned flags) { return fftwl_plan_dft_r2c_2d(n0, n1, in, out, flags); } inline auto plan_2d_backward(int n0, int n1, fftwl_complex* out, long double* in, unsigned flags) { return fftwl_plan_dft_c2r_2d(n0, n1, out, in, flags); } inline auto execute(fftwl_plan plan) { fftwl_execute(plan); } inline auto execute(fftwl_plan plan, long double* in, fftwl_complex* out) { fftwl_execute_dft_r2c(plan, in, out); } inline auto execute(fftwl_plan plan, fftwl_complex* in, long double* out) { fftwl_execute_dft_c2r(plan, in, out); } -} // namespace fftw + +} // namespace fftw_impl #endif // FFTW_INTERFACE diff --git a/src/mpi/fftw_mpi_engine.cpp b/src/core/fftw/mpi/fftw_mpi_engine.cpp similarity index 98% rename from src/mpi/fftw_mpi_engine.cpp rename to src/core/fftw/mpi/fftw_mpi_engine.cpp index be8bc09..1d6df72 100644 --- a/src/mpi/fftw_mpi_engine.cpp +++ b/src/core/fftw/mpi/fftw_mpi_engine.cpp @@ -1,135 +1,135 @@ /** * @file * LICENSE * * Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ -#include "fftw_mpi_engine.hh" -#include "fftw_mpi_interface.hh" +#include "fftw/mpi/fftw_mpi_engine.hh" +#include "fftw/interface.hh" #include "logger.hh" #include "mpi_interface.hh" #include "partitioner.hh" #include /* -------------------------------------------------------------------------- */ namespace tamaas { auto FFTWMPIEngine::local_size(const key_t& key) { const auto n = [&key]() { std::array sizes; std::copy_n(key.cbegin(), 2, sizes.begin()); sizes[1] = sizes[1] / 2 + 1; // hermitian size return sizes; }(); const auto sizes = Partitioner<2>::local_size(n); return std::make_pair(Partitioner<2>::alloc_size(n, key[2]), std::get<0>(sizes)); } void FFTWMPIEngine::forward(const Grid& real, GridHermitian& spectral) { const auto key = make_key(real, spectral); auto& plans = getPlans(key); auto& workspace = workspaces[key]; // Ensure enough space for MPI overhead const auto sizes = local_size(key); spectral.reserve(sizes.first); // Can't use iterators here because of size mistmatch in workspace #pragma omp parallel for collapse(3) for (UInt i = 0; i < real.sizes()[0]; ++i) for (UInt j = 0; j < real.sizes()[1]; ++j) for (UInt k = 0; k < real.getNbComponents(); ++k) workspace(i, j, k) = real(i, j, k); fftw_mpi_execute_dft_r2c(plans.first, workspace.getInternalData(), cast(spectral.getInternalData())); } void FFTWMPIEngine::backward(Grid& real, GridHermitian& spectral) { auto key = make_key(real, spectral); auto& plans = getPlans(key); auto& workspace = workspaces[key]; // Ensure enough space for MPI overhead const auto sizes = local_size(key); spectral.reserve(sizes.first); fftw_mpi_execute_dft_c2r(plans.second, cast(spectral.getInternalData()), workspace.getInternalData()); // Can't use iterators here because of size mistmatch in worspace #pragma omp parallel for collapse(3) for (UInt i = 0; i < real.sizes()[0]; ++i) for (UInt j = 0; j < real.sizes()[1]; ++j) for (UInt k = 0; k < real.getNbComponents(); ++k) real(i, j, k) = workspace(i, j, k); real *= (1. / real.getGlobalNbPoints()); } FFTWMPIEngine::key_t FFTWMPIEngine::make_key(const Grid& real, const GridHermitian& spectral) { auto key{FFTEngine::make_key(real, spectral)}; // Reduce first dimension for total size key.front() = mpi::allreduce(key.front()); return key; } FFTWMPIEngine::plan_t& FFTWMPIEngine::getPlans(key_t key) { if (plans.find(key) != plans.end()) return plans[key]; const int rank = key.size() - 3; // dimension of fft const int howmany = key[rank]; std::vector n(rank); // size of individual fft std::copy_n(key.begin(), rank, n.begin()); fftw::ptr in{nullptr}; fftw::ptr out{nullptr}; const auto sizes = local_size(key); workspaces.emplace(key, Grid({static_cast(sizes.second), 2 * (key[1] / 2 + 1)}, howmany)); auto& workspace = workspaces[key]; workspace.reserve(2 * sizes.first); out._ptr = fftw::helper::alloc_complex(sizes.first); fftw::plan forward{fftw_mpi_plan_many_dft_r2c( rank, n.data(), howmany, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, workspace.getInternalData(), out, mpi::comm::world(), flags())}; fftw::plan backward{fftw_mpi_plan_many_dft_c2r( rank, n.data(), howmany, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, out, workspace.getInternalData(), mpi::comm::world(), flags())}; plans[key] = std::make_pair(std::move(forward), std::move(backward)); return plans[key]; } } // namespace tamaas diff --git a/src/mpi/fftw_mpi_engine.hh b/src/core/fftw/mpi/fftw_mpi_engine.hh similarity index 97% rename from src/mpi/fftw_mpi_engine.hh rename to src/core/fftw/mpi/fftw_mpi_engine.hh index d69766c..f2b44f6 100644 --- a/src/mpi/fftw_mpi_engine.hh +++ b/src/core/fftw/mpi/fftw_mpi_engine.hh @@ -1,69 +1,69 @@ /** * @file * LICENSE * * Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ #ifndef FFTW_MPI_ENGINE_HH #define FFTW_MPI_ENGINE_HH /* -------------------------------------------------------------------------- */ -#include "fftw_engine.hh" -#include "fftw_interface.hh" +#include "fftw/fftw_engine.hh" +#include "fftw/interface.hh" #include "grid.hh" #include "grid_hermitian.hh" #include /* -------------------------------------------------------------------------- */ namespace tamaas { class FFTWMPIEngine : public FFTWEngine { public: using FFTWEngine::FFTWEngine; void forward(const Grid& /*real*/, GridHermitian& /*spectral*/) override { TAMAAS_EXCEPTION("FFTW/MPI does not support 1D transforms"); } void backward(Grid& /*real*/, GridHermitian& /*spectral*/) override { TAMAAS_EXCEPTION("FFTW/MPI does not support 1D transforms"); } /// FFTW/MPI forward (r2c) transform void forward(const Grid& real, GridHermitian& spectral) override; /// FFTW/MPI backward (c2r) transform void backward(Grid& real, GridHermitian& spectral) override; protected: /// Make a transform signature from a pair of grids static key_t make_key(const Grid& real, const GridHermitian& spectral); /// Return the plans pair for a given transform signature plan_t& getPlans(key_t key); /// Get FFTW local sizes from an hermitian grid static auto local_size(const key_t& key); protected: /// Buffer for real data because of FFTW/MPI layout std::map> workspaces; }; } // namespace tamaas #endif // FFT_MPI_ENGINE_HH diff --git a/src/mpi/fftw_mpi_interface.hh b/src/core/fftw/mpi/interface.hh similarity index 96% rename from src/mpi/fftw_mpi_interface.hh rename to src/core/fftw/mpi/interface.hh index b69c5db..3ebe5a8 100644 --- a/src/mpi/fftw_mpi_interface.hh +++ b/src/core/fftw/mpi/interface.hh @@ -1,69 +1,69 @@ /** * @file * LICENSE * * Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ -#ifndef FFTW_MPI_INTERFACE -#define FFTW_MPI_INTERFACE +#ifndef FFTW_MPI_INTERFACE_HH +#define FFTW_MPI_INTERFACE_HH /* -------------------------------------------------------------------------- */ #include "mpi_interface.hh" #include #include #include #include #ifdef TAMAAS_USE_MPI #include #endif /* -------------------------------------------------------------------------- */ -namespace fftw { +namespace fftw_impl { namespace mpi_dummy { inline void init() {} inline void cleanup() {} inline auto local_size_many(int rank, const std::ptrdiff_t* size, std::ptrdiff_t howmany) { return std::make_tuple(howmany * std::accumulate(size, size + rank, std::ptrdiff_t{1}, std::multiplies()), size[0], 0); } } // namespace mpi_dummy #ifdef TAMAAS_USE_MPI namespace mpi_impl { inline void init() { fftw_mpi_init(); } inline void cleanup() { fftw_mpi_cleanup(); } inline auto local_size_many(int rank, const std::ptrdiff_t* size, std::ptrdiff_t howmany) { if (rank < 2) throw std::domain_error("FFTW-MPI cannot be used for 1D transforms"); std::ptrdiff_t local_n0, local_n0_offset; auto res = fftw_mpi_local_size_many( rank, size, howmany, FFTW_MPI_DEFAULT_BLOCK, ::tamaas::mpi::comm::world(), &local_n0, &local_n0_offset); return std::make_tuple(res, local_n0, local_n0_offset); } } // namespace mpi_impl namespace mpi = mpi_impl; #else namespace mpi = mpi_dummy; #endif } // namespace fftw #endif diff --git a/src/mpi/partitioner.hh b/src/core/partitioner.hh similarity index 99% rename from src/mpi/partitioner.hh rename to src/core/partitioner.hh index 2880474..a523097 100644 --- a/src/mpi/partitioner.hh +++ b/src/core/partitioner.hh @@ -1,118 +1,118 @@ /** * @file * LICENSE * * Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ #ifndef PARTITIONER_HH #define PARTITIONER_HH /* -------------------------------------------------------------------------- */ -#include "fftw_mpi_interface.hh" +#include "fftw/interface.hh" #include "grid.hh" #include "mpi_interface.hh" #include "tamaas.hh" #include #include #include #include /* -------------------------------------------------------------------------- */ namespace tamaas { template struct Partitioner { template static decltype(auto) global_size(Container local) { local.front() = mpi::allreduce(local.front()); return local; } template static decltype(auto) global_size(const Grid& grid) { return global_size(grid.sizes()); } template static decltype(auto) local_size(Container global) { if (dim == 1) return global; auto tup = fftw::mpi::local_size_many(dim, cast_size(global).data(), 1); global.front() = static_cast(std::get<1>(tup)); return global; } template static decltype(auto) local_size(const Grid& grid) { return local_size(grid.sizes()); } static decltype(auto) local_size(std::initializer_list list) { std::array global; std::copy_n(list.begin(), dim, global.begin()); return local_size(global); } template static decltype(auto) local_offset(const Container& global) { if (dim == 1) return std::size_t{0}; auto tup = fftw::mpi::local_size_many(dim, cast_size(global).data(), 1); return static_cast(std::get<2>(tup)); } template static decltype(auto) local_offset(const Grid& grid) { auto offset = local_offset(global_size(grid.sizes())); return offset * grid.getStrides().front(); } static decltype(auto) local_offset(std::initializer_list list) { std::array global; std::copy_n(list.begin(), dim, global.begin()); return local_offset(global); } template static decltype(auto) cast_size(const Container& s) { std::array n; std::copy_n(s.cbegin(), dim, n.begin()); return n; } static decltype(auto) alloc_size(const std::array& global, UInt howmany) { if (dim == 1) return std::accumulate(global.begin(), global.end(), std::size_t{1}, std::multiplies()); auto tup = fftw::mpi::local_size_many(dim, cast_size(global).data(), howmany); return static_cast(std::get<0>(tup)); } template static decltype(auto) gather(const Grid& send) { Grid result(global_size(send), send.getNbComponents()); mpi::gather(send.getInternalData(), result.getInternalData(), send.dataSize()); return result; } }; } // namespace tamaas #endif diff --git a/src/core/statistics.cpp b/src/core/statistics.cpp index 10d8b5a..acbc22b 100644 --- a/src/core/statistics.cpp +++ b/src/core/statistics.cpp @@ -1,214 +1,214 @@ /** * @file * LICENSE * * Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ #include "statistics.hh" -#include "fftw_engine.hh" +#include "fft_engine.hh" #include "loop.hh" #include "static_types.hh" /* -------------------------------------------------------------------------- */ namespace tamaas { template Real Statistics::computeRMSHeights(Grid& surface) { return std::sqrt(surface.var()); } template Real Statistics::computeSpectralRMSSlope(Grid& surface) { const auto h_size = GridHermitian::hermitianDimensions(surface.sizes()); auto wavevectors = FFTEngine::template computeFrequencies(h_size); wavevectors *= 2 * M_PI; // need q for slopes const auto psd = computePowerSpectrum(surface); const Real rms_slope_mean = Loop::reduce( [] CUDA_LAMBDA(VectorProxy q, const Complex& psd_val) { // Checking if we're in the zone that does not have hermitian symmetry if (std::abs(q.back()) < 1e-15) return q.l2squared() * psd_val.real(); else return 2 * q.l2squared() * psd_val.real(); }, range>(wavevectors), psd); return std::sqrt(rms_slope_mean); } /* -------------------------------------------------------------------------- */ template GridHermitian Statistics::computePowerSpectrum(Grid& surface) { const auto h_size = GridHermitian::hermitianDimensions(surface.sizes()); GridHermitian psd(h_size, surface.getNbComponents()); FFTEngine::makeEngine()->forward(surface, psd); Real factor = 1. / surface.getGlobalNbPoints(); // Squaring the fourier transform of surface and normalizing Loop::loop( [factor] CUDA_LAMBDA(Complex & c) { c *= factor; c *= conj(c); }, psd); return psd; } /* -------------------------------------------------------------------------- */ template Grid Statistics::computeAutocorrelation(Grid& surface) { Grid acf(surface.sizes(), surface.getNbComponents()); auto psd = computePowerSpectrum(surface); FFTEngine::makeEngine()->backward(acf, psd); acf *= acf.getGlobalNbPoints(); return acf; } /* -------------------------------------------------------------------------- */ template Real Statistics::contact(const Grid& tractions, UInt perimeter) { Real points = 0; UInt nc = tractions.getNbComponents(); switch (nc) { case 1: points = Loop::reduce( [] CUDA_LAMBDA(const Real& t) -> Real { return t > 0; }, tractions); break; case 2: points = Loop::reduce( [] CUDA_LAMBDA(VectorProxy t) -> Real { return t.back() > 0; }, range>(tractions)); break; case 3: points = Loop::reduce( [] CUDA_LAMBDA(VectorProxy t) -> Real { return t.back() > 0; }, range>(tractions)); break; default: TAMAAS_EXCEPTION("Invalid number of components in traction"); } auto area = points / tractions.getNbPoints(); if (dim == 1) perimeter = 0; // Correction from Yastrebov et al. (Trib. Intl., 2017) // 10.1016/j.triboint.2017.04.023 return area - (M_PI - 1 + std::log(2)) / (24. * tractions.getNbPoints()) * perimeter; } /* -------------------------------------------------------------------------- */ namespace { template class moment_helper { public: moment_helper(const std::array& exp) : exponent(exp) {} CUDA_LAMBDA Complex operator()(VectorProxy q, const Complex& phi) const { Real mul = 1; for (UInt i = 0; i < dim; ++i) mul *= std::pow(q(i), exponent[i]); // Do not duplicate everything from hermitian symmetry if (std::abs(q.back()) < 1e-15) return mul * phi; else return 2 * mul * phi; } private: std::array exponent; }; } // namespace template <> std::vector Statistics<1>::computeMoments(Grid& surface) { constexpr UInt dim = 1; std::vector moments(3); const auto psd = computePowerSpectrum(surface); auto wavevectors = FFTEngine::template computeFrequencies(psd.sizes()); // we don't multiply by 2 pi because moments are computed with k moments[0] = Loop::reduce(moment_helper{{{0}}}, range(wavevectors), psd) .real(); moments[1] = Loop::reduce(moment_helper{{{2}}}, range(wavevectors), psd) .real(); moments[2] = Loop::reduce(moment_helper{{{4}}}, range(wavevectors), psd) .real(); return moments; } template <> std::vector Statistics<2>::computeMoments(Grid& surface) { constexpr UInt dim = 2; std::vector moments(3); const auto psd = computePowerSpectrum(surface); auto wavevectors = FFTEngine::template computeFrequencies(psd.sizes()); // we don't multiply by 2 pi because moments are computed with k moments[0] = Loop::reduce(moment_helper{{{0, 0}}}, range(wavevectors), psd) .real(); auto m02 = Loop::reduce(moment_helper{{{0, 2}}}, range(wavevectors), psd) .real(); auto m20 = Loop::reduce(moment_helper{{{2, 0}}}, range(wavevectors), psd) .real(); moments[1] = 0.5 * (m02 + m20); auto m22 = Loop::reduce(moment_helper{{{2, 2}}}, range(wavevectors), psd) .real(); auto m40 = Loop::reduce(moment_helper{{{4, 0}}}, range(wavevectors), psd) .real(); auto m04 = Loop::reduce(moment_helper{{{0, 4}}}, range(wavevectors), psd) .real(); moments[2] = (3 * m22 + m40 + m04) / 3.; return moments; } template struct Statistics<1>; template struct Statistics<2>; } // namespace tamaas diff --git a/src/core/tamaas.cpp b/src/core/tamaas.cpp index a58552d..695e6b5 100644 --- a/src/core/tamaas.cpp +++ b/src/core/tamaas.cpp @@ -1,105 +1,104 @@ /** * @file * LICENSE * * Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ #include "tamaas.hh" -#include "fftw_interface.hh" -#include "fftw_mpi_interface.hh" +#include "fftw/interface.hh" #include "logger.hh" #include "mpi_interface.hh" #if TAMAAS_LOOP_BACKEND == TAMAAS_LOOP_BACKEND_OMP #include #endif /* -------------------------------------------------------------------------- */ namespace tamaas { void initialize(UInt num_threads) { static bool has_warned = false; mpi::thread provided = mpi::thread::single; if (not mpi::initialized()) { mpi::init_thread(nullptr, nullptr, mpi::thread::multiple, &provided); } bool should_init_threads = (provided > mpi::thread::single); #if TAMAAS_LOOP_BACKEND == TAMAAS_LOOP_BACKEND_OMP if (num_threads) omp_set_num_threads(num_threads); // set user-defined number of threads else num_threads = omp_get_max_threads(); #else if (num_threads != 0) num_threads = 1; #endif #if TAMAAS_FFTW_BACKEND != TAMAAS_FFTW_BACKEND_NONE if (should_init_threads and (not fftw::init_threads())) { TAMAAS_EXCEPTION("FFTW could not initialize threads!"); } else if (not should_init_threads) Logger().get(LogLevel::debug) << "not initializing FFTW threads\n"; #endif if (mpi::initialized()) { if (not has_warned) { Logger().get(LogLevel::warning) << "experimental MPI support\n"; has_warned = true; } fftw::mpi::init(); } if (should_init_threads) { #if TAMAAS_FFTW_BACKEND != TAMAAS_FFTW_BACKEND_NONE Logger().get(LogLevel::debug) << "initializing FFTW with " << num_threads << " threads\n"; fftw::plan_with_nthreads(num_threads); #endif } } /* -------------------------------------------------------------------------- */ void finalize() { if (not mpi::finalized()) { #if TAMAAS_BACKEND != TAMAAS_BACKEND_CPP fftw::cleanup_threads(); #endif fftw::mpi::cleanup(); mpi::finalize(); } } namespace { /// Manager for initialize + finalize struct entry_exit_points { entry_exit_points() { initialize(); } ~entry_exit_points() { finalize(); } static const entry_exit_points singleton; }; const entry_exit_points entry_exit_points::singleton; } // namespace } // namespace tamaas diff --git a/src/core/tamaas.hh b/src/core/tamaas.hh index 46d2d4e..3b69cc7 100644 --- a/src/core/tamaas.hh +++ b/src/core/tamaas.hh @@ -1,190 +1,196 @@ /** * @mainpage Tamaas - A high-performance periodic contact library * * @section Introduction * Tamaas is a spectral-integral-equation based contact library. It is made * with love to be fast and friendly! * * @author Guillaume Anciaux * @author Lucas Frérot * @author Valentine Rey * @author Son Pham-Ba * * @section License * * Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne), * Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * */ /* -------------------------------------------------------------------------- */ #ifndef TAMAAS_HH #define TAMAAS_HH /* -------------------------------------------------------------------------- */ -#define TAMAAS_USE_FFTW ! defined(TAMAAS_USE_CUDA) + +#ifndef TAMAAS_USE_CUDA +#define TAMAAS_USE_FFTW +#endif + +// Values for fftw backends #define TAMAAS_FFTW_BACKEND_OMP 2 #define TAMAAS_FFTW_BACKEND_THREADS 2 #define TAMAAS_FFTW_BACKEND_NONE 3 +// Values for thrust backends #define TAMAAS_LOOP_BACKEND_OMP 1 #define TAMAAS_LOOP_BACKEND_TBB 2 #define TAMAAS_LOOP_BACKEND_CPP 3 #define TAMAAS_LOOP_BACKEND_CUDA 4 // Default loop backend is OpenMP #ifndef TAMAAS_LOOP_BACKEND #define TAMAAS_LOOP_BACKEND TAMAAS_LOOP_BACKEND_OMP #endif // Default FFTW backend is none #ifndef TAMAAS_FFTW_BACKEND #define TAMAAS_FFTW_BACKEND TAMAAS_FFTW_BACKEND_NONE #endif // If the thrust device hasn't been set, set OpenMP #ifndef THRUST_DEVICE_SYSTEM #define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_OMP #endif /* -------------------------------------------------------------------------- */ // Standard includes #include #include #include #include #include /* -------------------------------------------------------------------------- */ // Special thrust includes #include #include #ifdef TAMAAS_USE_CUDA -#include "unified_allocator.hh" +#include "cuda/unified_allocator.hh" #endif -#include "fftw_allocator.hh" +#include "fftw/fftw_allocator.hh" /* -------------------------------------------------------------------------- */ namespace tamaas { /* -------------------------------------------------------------------------- */ /// Cuda specific definitions #define CUDA_LAMBDA __device__ __host__ #ifdef TAMAAS_USE_CUDA template using Allocator = UnifiedAllocator; #else template using Allocator = FFTWAllocator; #endif /// Common types definitions // If type macros have not been set, put default values #ifndef TAMAAS_REAL_TYPE #define TAMAAS_REAL_TYPE double #endif #ifndef TAMAAS_INT_TYPE #define TAMAAS_INT_TYPE int #endif using Real = TAMAAS_REAL_TYPE; ///< default floating point type using Int = TAMAAS_INT_TYPE; ///< default signed integer type using UInt = std::make_unsigned_t; ///< default unsigned integer type template using complex = thrust::complex; ///< template complex wrapper using Complex = complex; ///< default floating point complex type /// Defining random toolbox using ::thrust::random::normal_distribution; using ::thrust::random::uniform_real_distribution; using random_engine = ::thrust::random::default_random_engine; namespace detail { template class Trait, typename Head, typename... Tail> struct fold_trait_tail_rec : std::integral_constant::value, Trait, Tail...>::value> {}; template class Trait, typename Head> struct fold_trait_tail_rec : std::integral_constant::value> {}; } // namespace detail template