diff --git a/SConstruct b/SConstruct
index 74268c2..f18588c 100644
--- a/SConstruct
+++ b/SConstruct
@@ -1,478 +1,480 @@
 # -*- mode:python; coding: utf-8 -*-
 # vim: set ft=python:
 
 # @file
 # LICENSE
 #
 # Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
 # Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published
 # by the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 # ------------------------------------------------------------------------------
 # Imports
 # ------------------------------------------------------------------------------
 from __future__ import print_function
 
 import sys
 import os
 from subprocess import check_output
 
 # Import below not strictly necessary, but good for pep8
 from SCons.Script import (
     EnsurePythonVersion,
     EnsureSConsVersion,
     Help,
     Environment,
     Variables,
     EnumVariable,
     PathVariable,
     BoolVariable,
     ListVariable,
     Split,
     Export,
     Dir,
 )
 
 from SCons.Errors import StopError
 from SCons import __version__ as scons_version
 
 from version import get_git_subst
 from detect import (
     FindFFTW,
     FindBoost,
     FindThrust,
     FindCuda,
     FindExpolit,
     FindPybind11
 )
 
 from INFOS import TAMAAS_INFOS
 
 # ------------------------------------------------------------------------------
 
 EnsurePythonVersion(2, 7)
 EnsureSConsVersion(2, 4)
 
 
 # ------------------------------------------------------------------------------
 def detect_dependencies(env):
     "Detect all dependencies"
     fftw_comp = {
         'omp': ['omp'],
         'threads': ['threads'],
         'none': [],
     }
 
     fftw_components = fftw_comp[env['fftw_threads']]
     if main_env['use_mpi']:
         fftw_components.append('mpi')
 
     FindFFTW(env, fftw_components, precision=env['real_type'])
 
     if main_env['backend'] == 'cuda':
         FindCuda(env)
 
     FindBoost(env, ['boost/preprocessor/seq.hpp'])
     FindExpolit(env)
 
     # Use thrust shipped with cuda if cuda is requested
     thrust_var = 'CUDA_ROOT' if 'CUDA_ROOT' in env['ENV'] else 'THRUST_ROOT'
     FindThrust(env, env['backend'], thrust_var)
 
     if env['build_python']:
         FindPybind11(env)
 
 
 def subdir(env, dir):
     "Building a sub-directory"
     return env.SConscript(env.File('SConscript', dir),
                           variant_dir=env.Dir(dir, env['build_dir']),
                           duplicate=True)
 
 def print_build_info(env):
     info = ("-- Tamaas ${version}\n" +
             "-- SCons {} (Python {}.{})\n".format(scons_version,
                                                   sys.version_info.major,
                                                   sys.version_info.minor) +
             "-- Build type: ${build_type}\n" +
             "-- Thrust backend: ${backend}\n" +
             "-- FFTW threads: ${fftw_threads}\n" +
             "-- MPI: ${use_mpi}\n" +
             "-- Build directory: ${build_dir}\n" +
             "-- Python version (bindings): $py_version")
     print(env.subst(info))
 
 
 # ------------------------------------------------------------------------------
 # Main compilation
 # ------------------------------------------------------------------------------
 
 # Compilation colors
 colors = {
     'cyan': '\033[96m',
     'purple': '\033[95m',
     'blue': '\033[94m',
     'green': '\033[92m',
     'yellow': '\033[93m',
     'gray': '\033[38;5;8m',
     'orange': '\033[38;5;208m',
     'red': '\033[91m',
     'end': '\033[0m'
 }
 
 # Inherit all environment variables (for CXX detection, etc.)
 main_env = Environment(
     ENV=os.environ,
 )
 
 # Set tamaas information
 for k, v in TAMAAS_INFOS._asdict().items():
     main_env[k] = v
 
 main_env['COLOR_DICT'] = colors
 main_env.AddMethod(subdir, 'SubDirectory')
 
 # Build variables
 vars = Variables('build-setup.conf')
 vars.AddVariables(
     EnumVariable('build_type', 'Build type', 'release',
                  allowed_values=('release', 'profiling', 'debug'),
                  ignorecase=2),
     EnumVariable('backend', 'Thrust backend', 'omp',
                  allowed_values=('cpp', 'omp', 'tbb', 'cuda'),
                  ignorecase=2),
     EnumVariable('fftw_threads', 'Threads FFTW library preference', 'omp',
                  allowed_values=('omp', 'threads', 'none'),
                  ignorecase=2),
     EnumVariable('sanitizer', 'Sanitizer type', 'none',
                  allowed_values=('none', 'memory', 'leaks', 'address'),
                  ignorecase=2),
 
     PathVariable('prefix',
                  'Prefix where to install', '/usr/local'),
 
     # Dependencies paths
     PathVariable('FFTW_ROOT',
                  'FFTW custom path', os.getenv('FFTW_ROOT', ''),
                  PathVariable.PathAccept),
     PathVariable('THRUST_ROOT',
                  'Thrust custom path', os.getenv('THRUST_ROOT', ''),
                  PathVariable.PathAccept),
     PathVariable('BOOST_ROOT',
                  'Boost custom path', os.getenv('BOOST_ROOT', ''),
                  PathVariable.PathAccept),
     PathVariable('CUDA_ROOT',
                  'Cuda custom path', os.getenv('CUDA_ROOT', ''),
                  PathVariable.PathAccept),
     # Dependencies provided as submodule get different default
     PathVariable('GTEST_ROOT',
                  'Googletest custom path',
                  os.getenv('GTEST_ROOT', '#third-party/googletest/googletest'),
                  PathVariable.PathAccept),
     PathVariable('PYBIND11_ROOT',
                  'Pybind11 custom path',
                  os.getenv('PYBIND11_ROOT', '#third-party/pybind11/include'),
                  PathVariable.PathAccept),
     PathVariable('EXPOLIT_ROOT',
                  'Expolit custom path',
                  os.getenv('EXPOLIT_ROOT', '#third-party/expolit/include'),
                  PathVariable.PathAccept),
 
     # Executables
     ('CXX', 'Compiler', os.getenv('CXX', 'g++')),
     ('MPICXX', 'MPI Compiler wrapper', os.getenv('MPICXX', 'mpicxx')),
     ('py_exec', 'Python executable', 'python3'),
 
     # Compiler flags
     ('CXXFLAGS', 'C++ compiler flags', os.getenv('CXXFLAGS', "")),
 
     # Cosmetic
     BoolVariable('verbose', 'Activate verbosity', False),
     BoolVariable('color', 'Color the non-verbose compilation output', False),
 
     # Tamaas components
     BoolVariable('build_doc', 'Build documentation', False),
     BoolVariable('build_tests', 'Build test suite', False),
     BoolVariable('build_python', 'Build python wrapper', True),
 
     # Documentation
     ListVariable('doc_builders', 'Generated documentation formats',
                  default='html', names=Split("html man")),  # TODO include latex
 
     # Dependencies
     BoolVariable('use_googletest', 'Build tests using GTest', False),
     BoolVariable('use_mpi', 'Builds multi-process parallelism', False),
 
     # Distribution options
     BoolVariable('strip_info', 'Strip binary of added information', False),
     BoolVariable('build_static_lib', "Build a static libTamaas", False),
 
     # Type variables
     EnumVariable('real_type', 'Type for real precision variables', 'double',
                  allowed_values=('double', 'long double')),
     EnumVariable('integer_type', 'Type for integer variables', 'int',
                  allowed_values=('int', 'long')),
 )
 
 # Set variables of environment
 vars.Update(main_env)
 help_text = vars.GenerateHelpText(main_env)
 help_text += """
 Commands:
     scons [build] [options]...                            Compile Tamaas (and additional modules/tests)
     scons install [prefix=/your/prefix] [options]...      Install Tamaas to prefix
     scons dev                                             Install symlink to Tamaas python module (useful to development purposes)
     scons test                                            Run tests with pytest
     scons doc                                             Compile documentation with Doxygen and Sphinx+Breathe
     scons archive                                         Create a gzipped archive from source
 """  # noqa
 Help(help_text)
 
 # Save all options, not just those that differ from default
 with open('build-setup.conf', 'w') as setup:
     for option in vars.options:
         setup.write("# " + option.help.replace('\n', '\n# ') + "\n")
         setup.write("{} = '{}'\n".format(option.key, main_env[option.key]))
 
 main_env['should_configure'] = \
     not main_env.GetOption('clean') and not main_env.GetOption('help')
 
 build_type = main_env['build_type']
 build_dir = 'build-${build_type}'
 main_env['build_dir'] = main_env.Dir(build_dir)
 
 
 # Setting up the python name with version
 if main_env['build_python']:
     args = (main_env.subst("${py_exec} -c").split()
             + ["from distutils.sysconfig import get_python_version;"
             "print(get_python_version())"])
     main_env['py_version'] = bytes(check_output(args)).decode()
 
 # Printing some build infos
 if main_env['should_configure']:
     print_build_info(main_env)
 
 verbose = main_env['verbose']
 
 # Remove colors if not set
 if not main_env['color']:
     for key in colors:
         colors[key] = ''
 
 if not verbose:
     main_env['CXXCOMSTR'] = main_env['SHCXXCOMSTR'] = \
         u'{0}[Compiling ($SHCXX)] {1}$SOURCE'.format(colors['green'],
                                                      colors['end'])
     main_env['LINKCOMSTR'] = main_env['SHLINKCOMSTR'] = \
         u'{0}[Linking] {1}$TARGET'.format(colors['purple'],
                                           colors['end'])
     main_env['ARCOMSTR'] = u'{}[Ar]{} $TARGET'.format(colors['purple'],
                                                       colors['end'])
     main_env['RANLIBCOMSTR'] = \
         u'{}[Randlib]{} $TARGET'.format(colors['purple'],
                                         colors['end'])
     main_env['PRINT_CMD_LINE_FUNC'] = pretty_cmd_print
     main_env['INSTALLSTR'] = \
         u'{}[Installing] {}$SOURCE to $TARGET'.format(colors['blue'],
                                                       colors['end'])
 
 # Include paths
 main_env.AppendUnique(CPPPATH=['#/src',
                                '#/src/core',
-                               '#/src/mpi',
                                '#/src/bem',
                                '#/src/surface',
-                               '#/src/python',
                                '#/src/percolation',
                                '#/src/model',
                                '#/src/model/elasto_plastic',
                                '#/src/solvers',
                                '#/src/gpu',
                                '#/python'])
 
 # Changing the shared object extension
 main_env['SHOBJSUFFIX'] = '.o'
 
+# Variables for clarity
+main_env['use_cuda'] = main_env['backend'] == "cuda"
+main_env['use_fftw'] = not main_env['use_cuda']
+
 # Back to gcc if cuda is activated
 if main_env['backend'] == "cuda" and "g++" not in main_env['CXX']:
     raise StopError('GCC should be used when compiling with CUDA')
 
 # OpenMP flags - compiler dependent
 omp_flags = {
     "g++": ["-fopenmp"],
     "clang++": ["-fopenmp"],
     "icpc": ["-qopenmp"]
 }
 
 
 def cxx_alias(cxx):
     for k in omp_flags.keys():
         if k in cxx:
             return k
 
     raise StopError('Unsupported compiler: ' + cxx)
 
 
 cxx = cxx_alias(main_env['CXX'])
 
 # Setting main compilation flags
 main_env['CXXFLAGS'] = Split(main_env['CXXFLAGS'])
 main_env['LINKFLAGS'] = main_env['CXXFLAGS']
 main_env.AppendUnique(
     CXXFLAGS=Split('-std=c++14 -Wall -Wextra'),
     CPPDEFINES={
         'TAMAAS_LOOP_BACKEND': 'TAMAAS_LOOP_BACKEND_${backend.upper()}',
         'TAMAAS_FFTW_BACKEND': 'TAMAAS_FFTW_BACKEND_${fftw_threads.upper()}'
     },
 )
 
 if main_env['backend'] != 'cuda':
     main_env.AppendUnique(CXXFLAGS=['-pedantic'])
 
 # Adding OpenMP flags
 if main_env['backend'] == 'omp':
     main_env.AppendUnique(CXXFLAGS=omp_flags[cxx])
     main_env.AppendUnique(LINKFLAGS=omp_flags[cxx])
 else:
     main_env.AppendUnique(CXXFLAGS=['-Wno-unknown-pragmas'])
 
 # Correct bug in clang?
 if main_env['backend'] == 'omp' and cxx == "clang++":
     main_env.AppendUnique(LIBS=["atomic"])
 elif main_env['backend'] == 'tbb':
     main_env.AppendUnique(LIBS=['tbb'])
 
 # Manage MPI compiler
 if main_env['use_mpi']:
     main_env['CXX'] = '$MPICXX'
     main_env.AppendUnique(CPPDEFINES=['TAMAAS_USE_MPI'])
     main_env.AppendUnique(CXXFLAGS=['-Wno-cast-function-type'])
 
 # Flags and options
 if main_env['build_type'] == 'debug':
     main_env.AppendUnique(CPPDEFINES=['TAMAAS_DEBUG'])
 
 # Define the scalar types
 main_env.AppendUnique(CPPDEFINES={'TAMAAS_REAL_TYPE': '${real_type}',
                                   'TAMAAS_INT_TYPE': '${integer_type}'})
 
 # Compilation flags
 cxxflags_dict = {
     "debug": Split("-g -O0"),
     "profiling": Split("-g -O3 -fno-omit-frame-pointer"),
     "release": Split("-O3")
 }
 
 if main_env['sanitizer'] != 'none':
     if main_env['backend'] == 'cuda':
         raise StopError(
             "Sanitizers with cuda are not yet supported!")
     cxxflags_dict[build_type].append('-fsanitize=${sanitizer}')
 
 main_env.AppendUnique(CXXFLAGS=cxxflags_dict[build_type])
 main_env.AppendUnique(SHLINKFLAGS=cxxflags_dict[build_type])
 main_env.AppendUnique(LINKFLAGS=cxxflags_dict[build_type])
 
 if main_env['should_configure']:
     basic_checks(main_env)
     detect_dependencies(main_env)
 
 # Writing information file
 main_env.Tool('textfile')
 main_env['SUBST_DICT'] = get_git_subst()
 
 # Empty values if requested
 if main_env['strip_info']:
     for k in main_env['SUBST_DICT']:
         main_env['SUBST_DICT'][k] = ""
 
 # Substitution of environment file
 main_env['SUBST_DICT'].update({
     '@build_type@': '$build_type',
     '@build_dir@': '${build_dir.abspath}',
     '@build_version@': '$version',
     '@backend@': '$backend',
 })
 
 # Environment file content
 env_content = """export PYTHONPATH=@build_dir@/python:$$PYTHONPATH
 export LD_LIBRARY_PATH=@build_dir@/src:$$LD_LIBRARY_PATH
 """
 
 # Writing environment file
 env_file = main_env.Textfile(
     main_env.File('tamaas_environment.sh', main_env['build_dir']),
     env_content)
 
 
 # Default targets
 build_targets = ['build-cpp', env_file]
 install_targets = ['install-lib']
 
 if main_env._get_major_minor_revision(scons_version)[0] >= 4:
     main_env.Tool('compilation_db')
     main_env.CompilationDatabase(PRINT_CMD_LINE_FUNC=pretty_cmd_print)
 
 # Building Tamaas library
 Export('main_env')
 main_env.SubDirectory('src')
 
 # Building Tamaas extra components
 for dir in ['python', 'tests']:
     if main_env['build_{}'.format(dir)] and not main_env.GetOption('help'):
         main_env.SubDirectory(dir)
         build_targets.append('build-{}'.format(dir))
 
 # Building API + Sphinx documentation if requested
 if main_env['build_doc']:
     main_env.SubDirectory('doc')
     main_env.Alias('doc', 'build-doc')
     install_targets.append('install-doc')
 else:
     dummy_command(main_env, 'doc', 'Command "doc" does not do anything'
                   ' without documentation activated ("build_doc=True")')
 
 # Define dummy dev command when python is deactivated
 if not main_env['build_python']:
     dummy_command(main_env, 'dev', 'Command "dev" does not do anything'
                   + ' without python activated ("build_python=True")')
 else:
     install_targets.append('install-python')
 
 # Define dummy test command when tests are deactivated
 if not main_env['build_tests']:
     dummy_command(main_env, 'test', 'Command "test" does not do anything'
                   + ' without tests activated ("build_tests=True")')
 
 # Definition of target aliases, a.k.a. sub-commands
 main_env.Alias('build', build_targets)
 
 # Define proper install targets
 main_env.Alias('install', install_targets)
 
 # Default target is to build stuff
 main_env.Default('build')
 
 # Building a tar archive
 archive = main_env.Command(
     'tamaas-${version}.tar.gz',
     '',
     ('git archive '
      '--format=tar.gz '
      '--prefix=tamaas/ '
      '-o $TARGET HEAD'),
 )
 main_env.Alias('archive', archive)
diff --git a/src/SConscript b/src/SConscript
index 222cb32..8fc8240 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -1,158 +1,146 @@
 # -*- mode:python; coding: utf-8 -*-
 # vim: set ft=python:
 
 # @file
 # LICENSE
 #
 # Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
 # Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published
 # by the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 from SCons.Script import Import, Export
 
 
 def prepend(env, path, list):
     return [env.File(x, path) for x in list]
 
 
 Import('main_env')
 env = main_env.Clone()
 env.AddMethod(prepend, 'PrependDir')
 
 # Core
 core_list = """
 fft_engine.cpp
-fftw_engine.cpp
 grid.cpp
 grid_hermitian.cpp
 statistics.cpp
 tamaas.cpp
 loop.cpp
 computes.cpp
 logger.cpp
 mpi_interface.cpp
 """.split()
 core_list = env.PrependDir('core', core_list)
 
+if env['use_fftw']:
+    core_list += ['core/fftw/fftw_engine.cpp']
+
+    if env['use_mpi']:
+        core_list += ['core/fftw/mpi/fftw_mpi_engine.cpp']
+
+if env['use_cuda']:
+    core_list += ['core/cuda/cufft_engine.cpp']
+
 info_file = env.Substfile('tamaas_info.cpp', 'tamaas_info.cpp.in')
 core_list.append(info_file)
 
 # Lib roughcontact
 generator_list = """
 surface_generator.cpp
 surface_generator_filter.cpp
 surface_generator_random_phase.cpp
 isopowerlaw.cpp
 regularized_powerlaw.cpp
 """.split()
 generator_list = env.PrependDir('surface', generator_list)
 
 # Lib PERCOLATION
 percolation_list = """
 flood_fill.cpp
 """.split()
 percolation_list = env.PrependDir('percolation', percolation_list)
 
 # Model
 model_list = """
 model.cpp
 model_factory.cpp
 model_type.cpp
 model_template.cpp
 integral_operator.cpp
 be_engine.cpp
 westergaard.cpp
 elastic_functional.cpp
 meta_functional.cpp
 adhesion_functional.cpp
 volume_potential.cpp
 kelvin.cpp
 mindlin.cpp
 boussinesq.cpp
 hooke.cpp
 
 elasto_plastic/isotropic_hardening.cpp
 elasto_plastic/residual.cpp
 
 integration/element.cpp
 """.split()
 model_list = env.PrependDir('model', model_list)
 
 # Solvers
 solvers_list = """
 contact_solver.cpp
 polonsky_keer_rey.cpp
 kato_saturated.cpp
 kato.cpp
 beck_teboulle.cpp
 condat.cpp
 polonsky_keer_tan.cpp
 ep_solver.cpp
 dfsane_solver.cpp
 epic.cpp
 """.split()
 solvers_list = env.PrependDir('solvers', solvers_list)
 
-# GPU API
-gpu_list = """
-cufft_engine.cpp
-""".split()
-gpu_list = env.PrependDir('gpu', gpu_list)
-
-# MPI API
-mpi_list = """
-fftw_mpi_engine.cpp
-""".split()
-mpi_list = env.PrependDir('mpi', mpi_list)
-
 # Assembling total list
 rough_contact_list = \
   core_list + generator_list + percolation_list + model_list + solvers_list
 
-# Adding GPU if needed
-if env['backend'] == 'cuda':
-    rough_contact_list += gpu_list
-
-# Adding MPI if needed
-if env['use_mpi']:
-    rough_contact_list += mpi_list
-
 # Adding extra warnings for Tamaas base lib
 env.AppendUnique(CXXFLAGS=['-Wextra'])
 
 # Allowing libTamaas.so to find libs in the same directory
 env.AppendUnique(RPATH=["'$$$$ORIGIN'"])
 
 # Build static library for packaging
 if env['build_static_lib']:
     env.AppendUnique(CXXFLAGS='-fPIC')
     libTamaas = env.StaticLibrary('Tamaas', rough_contact_list)
 # Build shared library (default)
 else:
     libTamaas = env.SharedLibrary('Tamaas', rough_contact_list,
                                   SHLIBVERSION=env['version'])
 
 
 # Specify install target to install lib
 lib_prefix = env.Dir('lib', env['prefix'])
 lib_install = env.InstallVersionedLib(target=lib_prefix,
                                       source=libTamaas)
 
 # Defining alias targets
 main_env.Alias('build-cpp', libTamaas)
 main_env.Alias('install-lib', lib_install)
 
 # Export target for use in python builds
 Export('libTamaas')
diff --git a/src/gpu/cufft_engine.cpp b/src/core/cuda/cufft_engine.cpp
similarity index 100%
rename from src/gpu/cufft_engine.cpp
rename to src/core/cuda/cufft_engine.cpp
diff --git a/src/gpu/cufft_engine.hh b/src/core/cuda/cufft_engine.hh
similarity index 100%
rename from src/gpu/cufft_engine.hh
rename to src/core/cuda/cufft_engine.hh
diff --git a/src/gpu/unified_allocator.hh b/src/core/cuda/unified_allocator.hh
similarity index 100%
rename from src/gpu/unified_allocator.hh
rename to src/core/cuda/unified_allocator.hh
diff --git a/src/core/fft_engine.cpp b/src/core/fft_engine.cpp
index d37c81a..daece46 100644
--- a/src/core/fft_engine.cpp
+++ b/src/core/fft_engine.cpp
@@ -1,60 +1,59 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #include "fft_engine.hh"
 
 #if TAMAAS_USE_CUDA
-#include "cufft_engine.hh"
+#include "cuda/cufft_engine.hh"
 #else
-#include "fftw_engine.hh"
-#endif
-
+#include "fftw/fftw_engine.hh"
 #ifdef TAMAAS_USE_MPI
-#include "fftw_mpi_engine.hh"
+#include "fftw/mpi/fftw_mpi_engine.hh"
 #include "mpi_interface.hh"
 #endif
+#endif
 
 /* -------------------------------------------------------------------------- */
 namespace tamaas {
 
 std::unique_ptr<FFTEngine> FFTEngine::makeEngine(unsigned int flags) {
 #define inst(x)                                                                \
   do {                                                                         \
     Logger().get(LogLevel::debug) << TAMAAS_DEBUG_MSG("[" #x "] Init");        \
     return std::make_unique<x>(flags);                                         \
   } while (0)
 
 #ifdef TAMAAS_USE_MPI
   if (mpi::size() != 1)
     inst(FFTWMPIEngine);
   else
     inst(FFTWEngine);
 #elif TAMAAS_USE_CUDA
   inst(CuFFTEngine);
 #else
   inst(FFTWEngine);
 #endif
 
 #undef inst
 }
 
 }  // namespace tamaas
diff --git a/src/core/fftw_allocator.hh b/src/core/fftw/fftw_allocator.hh
similarity index 100%
copy from src/core/fftw_allocator.hh
copy to src/core/fftw/fftw_allocator.hh
diff --git a/src/core/fftw_engine.cpp b/src/core/fftw/fftw_engine.cpp
similarity index 100%
rename from src/core/fftw_engine.cpp
rename to src/core/fftw/fftw_engine.cpp
diff --git a/src/core/fftw_engine.hh b/src/core/fftw/fftw_engine.hh
similarity index 97%
rename from src/core/fftw_engine.hh
rename to src/core/fftw/fftw_engine.hh
index 873fedf..3dfe0f8 100644
--- a/src/core/fftw_engine.hh
+++ b/src/core/fftw/fftw_engine.hh
@@ -1,105 +1,105 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
-#ifndef FFTW_ENGINE_H
-#define FFTW_ENGINE_H
+#ifndef FFTW_ENGINE_HH
+#define FFTW_ENGINE_HH
 /* -------------------------------------------------------------------------- */
 #include "fft_engine.hh"
-#include "fftw_interface.hh"
+#include "fftw/interface.hh"
 /* -------------------------------------------------------------------------- */
 namespace tamaas {
 
 class FFTWEngine : public FFTEngine {
-private:
+protected:
   using plan_t = std::pair<fftw::plan<Real>, fftw::plan<Real>>;
   using complex_t = fftw::helper<Real>::complex;
 
   /// Perform forward (R2C) transform
   template <UInt dim>
   void forwardImpl(const Grid<Real, dim>& real,
                    GridHermitian<Real, dim>& spectral);
   /// Perform backward (C2R) transform
   template <UInt dim>
   void backwardImpl(Grid<Real, dim>& real,
                     const GridHermitian<Real, dim>& spectral);
   /// Return the plans pair for a given transform signature
   plan_t& getPlans(key_t key);
 
 public:
   /// Initialize with flags
   explicit FFTWEngine(unsigned int flags = FFTW_ESTIMATE) noexcept
       : _flags(flags), plans() {}
 
   void forward(const Grid<Real, 1>& real,
                GridHermitian<Real, 1>& spectral) override {
     forwardImpl(real, spectral);
   }
 
   void forward(const Grid<Real, 2>& real,
                GridHermitian<Real, 2>& spectral) override {
     forwardImpl(real, spectral);
   }
 
   void backward(Grid<Real, 1>& real,
                 GridHermitian<Real, 1>& spectral) override {
     backwardImpl(real, spectral);
   }
 
   void backward(Grid<Real, 2>& real,
                 GridHermitian<Real, 2>& spectral) override {
     backwardImpl(real, spectral);
   }
 
   unsigned int flags() const { return _flags; }
 
   /// Cast to FFTW complex type
   static auto cast(Complex* data) { return reinterpret_cast<complex_t*>(data); }
   static auto cast(const Complex* data) {
     return const_cast<complex_t*>(reinterpret_cast<const complex_t*>(data));
   }
 
 protected:
   unsigned int _flags;            ///< FFTW flags
   std::map<key_t, plan_t> plans;  ///< plans corresponding to signatures
 };
 
 /* -------------------------------------------------------------------------- */
 template <UInt dim>
 void FFTWEngine::forwardImpl(const Grid<Real, dim>& real,
                              GridHermitian<Real, dim>& spectral) {
   auto& plans = getPlans(make_key(real, spectral));
   fftw::execute(plans.first, const_cast<Real*>(real.getInternalData()),
                 cast(spectral.getInternalData()));
 }
 
 template <UInt dim>
 void FFTWEngine::backwardImpl(Grid<Real, dim>& real,
                               const GridHermitian<Real, dim>& spectral) {
   auto& plans = getPlans(make_key(real, spectral));
   fftw::execute(plans.second, cast(spectral.getInternalData()),
                 real.getInternalData());
   // Normalize
   real *= (1. / real.getNbPoints());
 }
 
 }  // namespace tamaas
 #endif
diff --git a/src/core/fftw_allocator.hh b/src/core/fftw/interface.hh
similarity index 59%
copy from src/core/fftw_allocator.hh
copy to src/core/fftw/interface.hh
index c8b5932..03a9032 100644
--- a/src/core/fftw_allocator.hh
+++ b/src/core/fftw/interface.hh
@@ -1,52 +1,40 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
-#ifndef FFTW_ALLOCATOR_HH
-#define FFTW_ALLOCATOR_HH
-/* -------------------------------------------------------------------------- */
-#include <fftw3.h>
-#include <memory>
+#ifndef FFTW_INTERFACE_HH
+#define FFTW_INTERFACE_HH
 /* -------------------------------------------------------------------------- */
 
-namespace tamaas {
+#ifdef TAMAAS_USE_FFTW
+#include "interface_impl.hh"
+#include "mpi/interface.hh"
+
+namespace fftw = fftw_impl;
 
-/// Class allocating
-/// [SIMD](http://www.fftw.org/fftw3_doc/SIMD-alignment-and-fftw_005fmalloc.html#SIMD-alignment-and-fftw_005fmalloc)
-/// aligned memory
-template <typename T>
-struct FFTWAllocator {
-  /// Allocate memory
-  static T* allocate(std::size_t n) noexcept {
-    T* p = nullptr;
-    p = (T*)fftw_malloc(sizeof(T) * n);
-    return p;
-  }
+#else
+#include "interface_dummy.hh"
 
-  /// Free memory
-  static void deallocate(T* p, __attribute__((unused)) std::size_t n) noexcept {
-    fftw_free(p);
-  }
-};
+namespace fftw = fftw_dummy;
 
-}  // namespace tamaas
+#endif  // TAMAAS_USE_FFTW
 
-#endif  // FFTW_ALLOCATOR_HH
+#endif  // FFTW_INTERFACE
diff --git a/src/core/fftw_allocator.hh b/src/core/fftw/interface_dummy.hh
similarity index 59%
rename from src/core/fftw_allocator.hh
rename to src/core/fftw/interface_dummy.hh
index c8b5932..a5941bd 100644
--- a/src/core/fftw_allocator.hh
+++ b/src/core/fftw/interface_dummy.hh
@@ -1,52 +1,27 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
-#ifndef FFTW_ALLOCATOR_HH
-#define FFTW_ALLOCATOR_HH
+#ifndef FFTW_INTERFACE_DUMMY_HH
+#define FFTW_INTERFACE_DUMMY_HH
 /* -------------------------------------------------------------------------- */
-#include <fftw3.h>
-#include <memory>
-/* -------------------------------------------------------------------------- */
-
-namespace tamaas {
-
-/// Class allocating
-/// [SIMD](http://www.fftw.org/fftw3_doc/SIMD-alignment-and-fftw_005fmalloc.html#SIMD-alignment-and-fftw_005fmalloc)
-/// aligned memory
-template <typename T>
-struct FFTWAllocator {
-  /// Allocate memory
-  static T* allocate(std::size_t n) noexcept {
-    T* p = nullptr;
-    p = (T*)fftw_malloc(sizeof(T) * n);
-    return p;
-  }
-
-  /// Free memory
-  static void deallocate(T* p, __attribute__((unused)) std::size_t n) noexcept {
-    fftw_free(p);
-  }
-};
-
-}  // namespace tamaas
 
-#endif  // FFTW_ALLOCATOR_HH
+#endif  // FFTW_INTERFACE
diff --git a/src/core/fftw_interface.hh b/src/core/fftw/interface_impl.hh
similarity index 97%
rename from src/core/fftw_interface.hh
rename to src/core/fftw/interface_impl.hh
index 2d6d71d..bcc3556 100644
--- a/src/core/fftw_interface.hh
+++ b/src/core/fftw/interface_impl.hh
@@ -1,209 +1,206 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
-#ifndef FFTW_INTERFACE
-#define FFTW_INTERFACE
+#ifndef FFTW_INTERFACE_IMPL_HH
+#define FFTW_INTERFACE_IMPL_HH
 /* -------------------------------------------------------------------------- */
 #include <cstddef>
+#include <fftw3.h>
 #include <functional>
 #include <numeric>
 #include <utility>
 
-#include <fftw3.h>
-#ifdef TAMAAS_USE_MPI
-#include <fftw3-mpi.h>
-#endif
-
-namespace fftw {
+namespace fftw_impl {
 template <typename T>
 struct helper;
 
 template <>
 struct helper<double> {
   using complex = fftw_complex;
   using plan = fftw_plan;
 
   static auto alloc_real(std::size_t size) { return fftw_alloc_real(size); }
   static auto alloc_complex(std::size_t size) {
     return fftw_alloc_complex(size);
   }
 };
 
 template <>
 struct helper<long double> {
   using complex = fftwl_complex;
   using plan = fftwl_plan;
 
   static auto alloc_real(std::size_t size) { return fftwl_alloc_real(size); }
   static auto alloc_complex(std::size_t size) {
     return fftwl_alloc_complex(size);
   }
 };
 
 template <typename T>
 inline auto free(T* ptr) {
   fftw_free(ptr);
 }
 
 inline auto destroy(fftw_plan plan) { fftw_destroy_plan(plan); }
 inline auto destroy(fftwl_plan plan) { fftwl_destroy_plan(plan); }
 inline auto init_threads() { return fftw_init_threads(); }
 inline auto plan_with_nthreads(int nthreads) {
   return fftw_plan_with_nthreads(nthreads);
 }
 inline auto cleanup_threads() { return fftw_cleanup_threads(); }
 
 /// Holder type for fftw plans
 template <typename T>
 struct plan {
   typename helper<T>::plan _plan;
 
   /// Create from plan
   explicit plan(typename helper<T>::plan _plan = nullptr) : _plan(_plan) {}
   /// Move constructor to avoid accidental plan destruction
   plan(plan&& o) : _plan(std::exchange(o._plan, nullptr)) {}
   /// Move operator
   plan& operator=(plan&& o) {
     _plan = std::exchange(o._plan, nullptr);
     return *this;
   }
   /// Destroy plan
   ~plan() noexcept {
     if (_plan)
-      fftw::destroy(_plan);
+      fftw_impl::destroy(_plan);
   }
 
   /// For seamless use with fftw api
   operator typename helper<T>::plan() const { return _plan; }
 };
 
 /// RAII helper for fftw_free
 template <typename T>
 struct ptr {
   T* _ptr;
 
   ~ptr() noexcept {
     if (_ptr)
-      fftw::free(_ptr);
+      fftw_impl::free(_ptr);
   }
 
   operator T*() { return _ptr; }
 };
 
 /* -------------------------------------------------------------------------- */
 
 inline auto plan_many_forward(int rank, const int* n, int howmany, double* in,
                               const int* inembed, int istride, int idist,
                               fftw_complex* out, const int* onembed,
                               int ostride, int odist, unsigned flags) {
   return fftw_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist,
                                 out, onembed, ostride, odist, flags);
 }
 
 inline auto plan_many_backward(int rank, const int* n, int howmany,
                                fftw_complex* in, const int* inembed,
                                int istride, int idist, double* out,
                                const int* onembed, int ostride, int odist,
                                unsigned flags) {
   return fftw_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist,
                                 out, onembed, ostride, odist, flags);
 }
 
 inline auto plan_1d_forward(int n, double* in, fftw_complex* out,
                             unsigned flags) {
   return fftw_plan_dft_r2c_1d(n, in, out, flags);
 }
 
 inline auto plan_1d_backward(int n, fftw_complex* in, double* out,
                              unsigned flags) {
   return fftw_plan_dft_c2r_1d(n, in, out, flags);
 }
 
 inline auto plan_2d_forward(int n0, int n1, double* in, fftw_complex* out,
                             unsigned flags) {
   return fftw_plan_dft_r2c_2d(n0, n1, in, out, flags);
 }
 
 inline auto plan_2d_backward(int n0, int n1, fftw_complex* out, double* in,
                              unsigned flags) {
   return fftw_plan_dft_c2r_2d(n0, n1, out, in, flags);
 }
 
 inline auto execute(fftw_plan plan) { fftw_execute(plan); }
 inline auto execute(fftw_plan plan, double* in, fftw_complex* out) {
   fftw_execute_dft_r2c(plan, in, out);
 }
 
 inline auto execute(fftw_plan plan, fftw_complex* in, double* out) {
   fftw_execute_dft_c2r(plan, in, out);
 }
 
 /* -------------------------------------------------------------------------- */
 
 inline auto plan_many_forward(int rank, const int* n, int howmany,
                               long double* in, const int* inembed, int istride,
                               int idist, fftwl_complex* out, const int* onembed,
                               int ostride, int odist, unsigned flags) {
   return fftwl_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist,
                                  out, onembed, ostride, odist, flags);
 }
 
 inline auto plan_many_backward(int rank, const int* n, int howmany,
                                fftwl_complex* in, const int* inembed,
                                int istride, int idist, long double* out,
                                const int* onembed, int ostride, int odist,
                                unsigned flags) {
   return fftwl_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist,
                                  out, onembed, ostride, odist, flags);
 }
 
 inline auto plan_1d_forward(int n, long double* in, fftwl_complex* out,
                             unsigned flags) {
   return fftwl_plan_dft_r2c_1d(n, in, out, flags);
 }
 
 inline auto plan_1d_backward(int n, fftwl_complex* in, long double* out,
                              unsigned flags) {
   return fftwl_plan_dft_c2r_1d(n, in, out, flags);
 }
 
 inline auto plan_2d_forward(int n0, int n1, long double* in, fftwl_complex* out,
                             unsigned flags) {
   return fftwl_plan_dft_r2c_2d(n0, n1, in, out, flags);
 }
 
 inline auto plan_2d_backward(int n0, int n1, fftwl_complex* out,
                              long double* in, unsigned flags) {
   return fftwl_plan_dft_c2r_2d(n0, n1, out, in, flags);
 }
 
 inline auto execute(fftwl_plan plan) { fftwl_execute(plan); }
 inline auto execute(fftwl_plan plan, long double* in, fftwl_complex* out) {
   fftwl_execute_dft_r2c(plan, in, out);
 }
 
 inline auto execute(fftwl_plan plan, fftwl_complex* in, long double* out) {
   fftwl_execute_dft_c2r(plan, in, out);
 }
-}  // namespace fftw
+
+}  // namespace fftw_impl
 
 #endif  // FFTW_INTERFACE
diff --git a/src/mpi/fftw_mpi_engine.cpp b/src/core/fftw/mpi/fftw_mpi_engine.cpp
similarity index 98%
rename from src/mpi/fftw_mpi_engine.cpp
rename to src/core/fftw/mpi/fftw_mpi_engine.cpp
index be8bc09..1d6df72 100644
--- a/src/mpi/fftw_mpi_engine.cpp
+++ b/src/core/fftw/mpi/fftw_mpi_engine.cpp
@@ -1,135 +1,135 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
-#include "fftw_mpi_engine.hh"
-#include "fftw_mpi_interface.hh"
+#include "fftw/mpi/fftw_mpi_engine.hh"
+#include "fftw/interface.hh"
 #include "logger.hh"
 #include "mpi_interface.hh"
 #include "partitioner.hh"
 #include <algorithm>
 /* -------------------------------------------------------------------------- */
 namespace tamaas {
 
 auto FFTWMPIEngine::local_size(const key_t& key) {
   const auto n = [&key]() {
     std::array<UInt, 2> sizes;
     std::copy_n(key.cbegin(), 2, sizes.begin());
     sizes[1] = sizes[1] / 2 + 1;  // hermitian size
     return sizes;
   }();
 
   const auto sizes = Partitioner<2>::local_size(n);
   return std::make_pair(Partitioner<2>::alloc_size(n, key[2]),
                         std::get<0>(sizes));
 }
 
 void FFTWMPIEngine::forward(const Grid<Real, 2>& real,
                             GridHermitian<Real, 2>& spectral) {
   const auto key = make_key(real, spectral);
   auto& plans = getPlans(key);
   auto& workspace = workspaces[key];
 
   // Ensure enough space for MPI overhead
   const auto sizes = local_size(key);
   spectral.reserve(sizes.first);
 
   // Can't use iterators here because of size mistmatch in workspace
 #pragma omp parallel for collapse(3)
   for (UInt i = 0; i < real.sizes()[0]; ++i)
     for (UInt j = 0; j < real.sizes()[1]; ++j)
       for (UInt k = 0; k < real.getNbComponents(); ++k)
         workspace(i, j, k) = real(i, j, k);
 
   fftw_mpi_execute_dft_r2c(plans.first, workspace.getInternalData(),
                            cast(spectral.getInternalData()));
 }
 
 void FFTWMPIEngine::backward(Grid<Real, 2>& real,
                              GridHermitian<Real, 2>& spectral) {
   auto key = make_key(real, spectral);
   auto& plans = getPlans(key);
   auto& workspace = workspaces[key];
 
   // Ensure enough space for MPI overhead
   const auto sizes = local_size(key);
   spectral.reserve(sizes.first);
 
   fftw_mpi_execute_dft_c2r(plans.second, cast(spectral.getInternalData()),
                            workspace.getInternalData());
 
   // Can't use iterators here because of size mistmatch in worspace
 #pragma omp parallel for collapse(3)
   for (UInt i = 0; i < real.sizes()[0]; ++i)
     for (UInt j = 0; j < real.sizes()[1]; ++j)
       for (UInt k = 0; k < real.getNbComponents(); ++k)
         real(i, j, k) = workspace(i, j, k);
 
   real *= (1. / real.getGlobalNbPoints());
 }
 
 FFTWMPIEngine::key_t
 FFTWMPIEngine::make_key(const Grid<Real, 2>& real,
                         const GridHermitian<Real, 2>& spectral) {
   auto key{FFTEngine::make_key(real, spectral)};
 
   // Reduce first dimension for total size
   key.front() = mpi::allreduce<operation::plus>(key.front());
 
   return key;
 }
 
 FFTWMPIEngine::plan_t& FFTWMPIEngine::getPlans(key_t key) {
   if (plans.find(key) != plans.end())
     return plans[key];
 
   const int rank = key.size() - 3;  // dimension of fft
   const int howmany = key[rank];
 
   std::vector<std::ptrdiff_t> n(rank);  // size of individual fft
   std::copy_n(key.begin(), rank, n.begin());
 
   fftw::ptr<Real> in{nullptr};
   fftw::ptr<complex_t> out{nullptr};
 
   const auto sizes = local_size(key);
 
   workspaces.emplace(key, Grid<Real, 2>({static_cast<UInt>(sizes.second),
                                          2 * (key[1] / 2 + 1)},
                                         howmany));
 
   auto& workspace = workspaces[key];
   workspace.reserve(2 * sizes.first);
 
   out._ptr = fftw::helper<Real>::alloc_complex(sizes.first);
 
   fftw::plan<Real> forward{fftw_mpi_plan_many_dft_r2c(
       rank, n.data(), howmany, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
       workspace.getInternalData(), out, mpi::comm::world(), flags())};
   fftw::plan<Real> backward{fftw_mpi_plan_many_dft_c2r(
       rank, n.data(), howmany, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
       out, workspace.getInternalData(), mpi::comm::world(), flags())};
 
   plans[key] = std::make_pair(std::move(forward), std::move(backward));
   return plans[key];
 }
 
 }  // namespace tamaas
diff --git a/src/mpi/fftw_mpi_engine.hh b/src/core/fftw/mpi/fftw_mpi_engine.hh
similarity index 97%
rename from src/mpi/fftw_mpi_engine.hh
rename to src/core/fftw/mpi/fftw_mpi_engine.hh
index d69766c..f2b44f6 100644
--- a/src/mpi/fftw_mpi_engine.hh
+++ b/src/core/fftw/mpi/fftw_mpi_engine.hh
@@ -1,69 +1,69 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #ifndef FFTW_MPI_ENGINE_HH
 #define FFTW_MPI_ENGINE_HH
 /* -------------------------------------------------------------------------- */
-#include "fftw_engine.hh"
-#include "fftw_interface.hh"
+#include "fftw/fftw_engine.hh"
+#include "fftw/interface.hh"
 #include "grid.hh"
 #include "grid_hermitian.hh"
 #include <map>
 /* -------------------------------------------------------------------------- */
 namespace tamaas {
 
 class FFTWMPIEngine : public FFTWEngine {
 public:
   using FFTWEngine::FFTWEngine;
 
   void forward(const Grid<Real, 1>& /*real*/,
                GridHermitian<Real, 1>& /*spectral*/) override {
     TAMAAS_EXCEPTION("FFTW/MPI does not support 1D transforms");
   }
   void backward(Grid<Real, 1>& /*real*/,
                 GridHermitian<Real, 1>& /*spectral*/) override {
     TAMAAS_EXCEPTION("FFTW/MPI does not support 1D transforms");
   }
 
   /// FFTW/MPI forward (r2c) transform
   void forward(const Grid<Real, 2>& real,
                GridHermitian<Real, 2>& spectral) override;
   /// FFTW/MPI backward (c2r) transform
   void backward(Grid<Real, 2>& real,
                 GridHermitian<Real, 2>& spectral) override;
 
 protected:
   /// Make a transform signature from a pair of grids
   static key_t make_key(const Grid<Real, 2>& real,
                         const GridHermitian<Real, 2>& spectral);
   /// Return the plans pair for a given transform signature
   plan_t& getPlans(key_t key);
   /// Get FFTW local sizes from an hermitian grid
   static auto local_size(const key_t& key);
 
 protected:
   /// Buffer for real data because of FFTW/MPI layout
   std::map<key_t, Grid<Real, 2>> workspaces;
 };
 
 }  // namespace tamaas
 #endif  // FFT_MPI_ENGINE_HH
diff --git a/src/mpi/fftw_mpi_interface.hh b/src/core/fftw/mpi/interface.hh
similarity index 96%
rename from src/mpi/fftw_mpi_interface.hh
rename to src/core/fftw/mpi/interface.hh
index b69c5db..3ebe5a8 100644
--- a/src/mpi/fftw_mpi_interface.hh
+++ b/src/core/fftw/mpi/interface.hh
@@ -1,69 +1,69 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
-#ifndef FFTW_MPI_INTERFACE
-#define FFTW_MPI_INTERFACE
+#ifndef FFTW_MPI_INTERFACE_HH
+#define FFTW_MPI_INTERFACE_HH
 /* -------------------------------------------------------------------------- */
 #include "mpi_interface.hh"
 #include <cstdlib>
 #include <numeric>
 #include <stdexcept>
 #include <tuple>
 #ifdef TAMAAS_USE_MPI
 #include <fftw3-mpi.h>
 #endif
 /* -------------------------------------------------------------------------- */
-namespace fftw {
+namespace fftw_impl {
 namespace mpi_dummy {
 inline void init() {}
 inline void cleanup() {}
 inline auto local_size_many(int rank, const std::ptrdiff_t* size,
                             std::ptrdiff_t howmany) {
   return std::make_tuple(howmany * std::accumulate(size, size + rank,
                                                    std::ptrdiff_t{1},
                                                    std::multiplies<void>()),
                          size[0], 0);
 }
 }  // namespace mpi_dummy
 
 #ifdef TAMAAS_USE_MPI
 namespace mpi_impl {
 inline void init() { fftw_mpi_init(); }
 inline void cleanup() { fftw_mpi_cleanup(); }
 inline auto local_size_many(int rank, const std::ptrdiff_t* size,
                             std::ptrdiff_t howmany) {
   if (rank < 2)
     throw std::domain_error("FFTW-MPI cannot be used for 1D transforms");
 
   std::ptrdiff_t local_n0, local_n0_offset;
   auto res = fftw_mpi_local_size_many(
       rank, size, howmany, FFTW_MPI_DEFAULT_BLOCK, ::tamaas::mpi::comm::world(),
       &local_n0, &local_n0_offset);
   return std::make_tuple(res, local_n0, local_n0_offset);
 }
 }  // namespace mpi_impl
 namespace mpi = mpi_impl;
 #else
 namespace mpi = mpi_dummy;
 #endif
 }  // namespace fftw
 #endif
diff --git a/src/mpi/partitioner.hh b/src/core/partitioner.hh
similarity index 99%
rename from src/mpi/partitioner.hh
rename to src/core/partitioner.hh
index 2880474..a523097 100644
--- a/src/mpi/partitioner.hh
+++ b/src/core/partitioner.hh
@@ -1,118 +1,118 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #ifndef PARTITIONER_HH
 #define PARTITIONER_HH
 /* -------------------------------------------------------------------------- */
-#include "fftw_mpi_interface.hh"
+#include "fftw/interface.hh"
 #include "grid.hh"
 #include "mpi_interface.hh"
 #include "tamaas.hh"
 #include <algorithm>
 #include <array>
 #include <cstdlib>
 #include <functional>
 /* -------------------------------------------------------------------------- */
 namespace tamaas {
 
 template <UInt dim>
 struct Partitioner {
   template <typename Container>
   static decltype(auto) global_size(Container local) {
     local.front() = mpi::allreduce<operation::plus>(local.front());
     return local;
   }
 
   template <typename T>
   static decltype(auto) global_size(const Grid<T, dim>& grid) {
     return global_size(grid.sizes());
   }
 
   template <typename Container>
   static decltype(auto) local_size(Container global) {
     if (dim == 1)
       return global;
 
     auto tup = fftw::mpi::local_size_many(dim, cast_size(global).data(), 1);
     global.front() = static_cast<UInt>(std::get<1>(tup));
     return global;
   }
 
   template <typename T>
   static decltype(auto) local_size(const Grid<T, dim>& grid) {
     return local_size(grid.sizes());
   }
 
   static decltype(auto) local_size(std::initializer_list<UInt> list) {
     std::array<UInt, dim> global;
     std::copy_n(list.begin(), dim, global.begin());
     return local_size(global);
   }
 
   template <typename Container>
   static decltype(auto) local_offset(const Container& global) {
     if (dim == 1)
       return std::size_t{0};
     auto tup = fftw::mpi::local_size_many(dim, cast_size(global).data(), 1);
     return static_cast<std::size_t>(std::get<2>(tup));
   }
 
   template <typename T>
   static decltype(auto) local_offset(const Grid<T, dim>& grid) {
     auto offset = local_offset(global_size(grid.sizes()));
     return offset * grid.getStrides().front();
   }
 
   static decltype(auto) local_offset(std::initializer_list<UInt> list) {
     std::array<UInt, dim> global;
     std::copy_n(list.begin(), dim, global.begin());
     return local_offset(global);
   }
 
   template <typename Container>
   static decltype(auto) cast_size(const Container& s) {
     std::array<std::ptrdiff_t, dim> n;
     std::copy_n(s.cbegin(), dim, n.begin());
     return n;
   }
 
   static decltype(auto) alloc_size(const std::array<UInt, dim>& global,
                                    UInt howmany) {
     if (dim == 1)
       return std::accumulate(global.begin(), global.end(), std::size_t{1},
                              std::multiplies<void>());
     auto tup =
         fftw::mpi::local_size_many(dim, cast_size(global).data(), howmany);
     return static_cast<std::size_t>(std::get<0>(tup));
   }
 
   template <typename T>
   static decltype(auto) gather(const Grid<T, dim>& send) {
     Grid<T, dim> result(global_size(send), send.getNbComponents());
     mpi::gather(send.getInternalData(), result.getInternalData(),
                 send.dataSize());
     return result;
   }
 };
 
 }  // namespace tamaas
 #endif
diff --git a/src/core/statistics.cpp b/src/core/statistics.cpp
index 10d8b5a..acbc22b 100644
--- a/src/core/statistics.cpp
+++ b/src/core/statistics.cpp
@@ -1,214 +1,214 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #include "statistics.hh"
-#include "fftw_engine.hh"
+#include "fft_engine.hh"
 #include "loop.hh"
 #include "static_types.hh"
 /* -------------------------------------------------------------------------- */
 namespace tamaas {
 
 template <UInt dim>
 Real Statistics<dim>::computeRMSHeights(Grid<Real, dim>& surface) {
   return std::sqrt(surface.var());
 }
 
 template <UInt dim>
 Real Statistics<dim>::computeSpectralRMSSlope(Grid<Real, dim>& surface) {
   const auto h_size =
       GridHermitian<Real, dim>::hermitianDimensions(surface.sizes());
   auto wavevectors =
       FFTEngine::template computeFrequencies<Real, dim, true>(h_size);
   wavevectors *= 2 * M_PI;  // need q for slopes
   const auto psd = computePowerSpectrum(surface);
 
   const Real rms_slope_mean = Loop::reduce<operation::plus>(
       [] CUDA_LAMBDA(VectorProxy<const Real, dim> q, const Complex& psd_val) {
         // Checking if we're in the zone that does not have hermitian symmetry
         if (std::abs(q.back()) < 1e-15)
           return q.l2squared() * psd_val.real();
         else
           return 2 * q.l2squared() * psd_val.real();
       },
       range<VectorProxy<const Real, dim>>(wavevectors), psd);
 
   return std::sqrt(rms_slope_mean);
 }
 
 /* -------------------------------------------------------------------------- */
 
 template <UInt dim>
 GridHermitian<Real, dim>
 Statistics<dim>::computePowerSpectrum(Grid<Real, dim>& surface) {
   const auto h_size =
       GridHermitian<Real, dim>::hermitianDimensions(surface.sizes());
   GridHermitian<Real, dim> psd(h_size, surface.getNbComponents());
 
   FFTEngine::makeEngine()->forward(surface, psd);
 
   Real factor = 1. / surface.getGlobalNbPoints();
 
   // Squaring the fourier transform of surface and normalizing
   Loop::loop(
       [factor] CUDA_LAMBDA(Complex & c) {
         c *= factor;
         c *= conj(c);
       },
       psd);
 
   return psd;
 }
 
 /* -------------------------------------------------------------------------- */
 
 template <UInt dim>
 Grid<Real, dim>
 Statistics<dim>::computeAutocorrelation(Grid<Real, dim>& surface) {
   Grid<Real, dim> acf(surface.sizes(), surface.getNbComponents());
   auto psd = computePowerSpectrum(surface);
 
   FFTEngine::makeEngine()->backward(acf, psd);
   acf *= acf.getGlobalNbPoints();
   return acf;
 }
 
 /* -------------------------------------------------------------------------- */
 template <UInt dim>
 Real Statistics<dim>::contact(const Grid<Real, dim>& tractions,
                               UInt perimeter) {
   Real points = 0;
   UInt nc = tractions.getNbComponents();
 
   switch (nc) {
   case 1:
     points = Loop::reduce(
         [] CUDA_LAMBDA(const Real& t) -> Real { return t > 0; }, tractions);
     break;
   case 2:
     points = Loop::reduce(
         [] CUDA_LAMBDA(VectorProxy<const Real, 2> t) -> Real {
           return t.back() > 0;
         },
         range<VectorProxy<const Real, 2>>(tractions));
     break;
   case 3:
     points = Loop::reduce(
         [] CUDA_LAMBDA(VectorProxy<const Real, 3> t) -> Real {
           return t.back() > 0;
         },
         range<VectorProxy<const Real, 3>>(tractions));
     break;
   default:
     TAMAAS_EXCEPTION("Invalid number of components in traction");
   }
 
   auto area = points / tractions.getNbPoints();
 
   if (dim == 1)
     perimeter = 0;
 
   // Correction from Yastrebov et al. (Trib. Intl., 2017)
   // 10.1016/j.triboint.2017.04.023
   return area -
          (M_PI - 1 + std::log(2)) / (24. * tractions.getNbPoints()) * perimeter;
 }
 
 /* -------------------------------------------------------------------------- */
 namespace {
 template <UInt dim>
 class moment_helper {
 public:
   moment_helper(const std::array<UInt, dim>& exp) : exponent(exp) {}
 
   CUDA_LAMBDA Complex operator()(VectorProxy<Real, dim> q,
                                  const Complex& phi) const {
     Real mul = 1;
     for (UInt i = 0; i < dim; ++i)
       mul *= std::pow(q(i), exponent[i]);
 
     // Do not duplicate everything from hermitian symmetry
     if (std::abs(q.back()) < 1e-15)
       return mul * phi;
     else
       return 2 * mul * phi;
   }
 
 private:
   std::array<UInt, dim> exponent;
 };
 }  // namespace
 
 template <>
 std::vector<Real> Statistics<1>::computeMoments(Grid<Real, 1>& surface) {
   constexpr UInt dim = 1;
   std::vector<Real> moments(3);
   const auto psd = computePowerSpectrum(surface);
   auto wavevectors =
       FFTEngine::template computeFrequencies<Real, dim, true>(psd.sizes());
   // we don't multiply by 2 pi because moments are computed with k
   moments[0] = Loop::reduce<operation::plus>(moment_helper<dim>{{{0}}},
                                              range<PVector>(wavevectors), psd)
                    .real();
   moments[1] = Loop::reduce<operation::plus>(moment_helper<dim>{{{2}}},
                                              range<PVector>(wavevectors), psd)
                    .real();
   moments[2] = Loop::reduce<operation::plus>(moment_helper<dim>{{{4}}},
                                              range<PVector>(wavevectors), psd)
                    .real();
   return moments;
 }
 
 template <>
 std::vector<Real> Statistics<2>::computeMoments(Grid<Real, 2>& surface) {
   constexpr UInt dim = 2;
   std::vector<Real> moments(3);
   const auto psd = computePowerSpectrum(surface);
   auto wavevectors =
       FFTEngine::template computeFrequencies<Real, dim, true>(psd.sizes());
   // we don't multiply by 2 pi because moments are computed with k
   moments[0] = Loop::reduce<operation::plus>(moment_helper<dim>{{{0, 0}}},
                                              range<PVector>(wavevectors), psd)
                    .real();
   auto m02 = Loop::reduce<operation::plus>(moment_helper<dim>{{{0, 2}}},
                                            range<PVector>(wavevectors), psd)
                  .real();
   auto m20 = Loop::reduce<operation::plus>(moment_helper<dim>{{{2, 0}}},
                                            range<PVector>(wavevectors), psd)
                  .real();
   moments[1] = 0.5 * (m02 + m20);
   auto m22 = Loop::reduce<operation::plus>(moment_helper<dim>{{{2, 2}}},
                                            range<PVector>(wavevectors), psd)
                  .real();
   auto m40 = Loop::reduce<operation::plus>(moment_helper<dim>{{{4, 0}}},
                                            range<PVector>(wavevectors), psd)
                  .real();
   auto m04 = Loop::reduce<operation::plus>(moment_helper<dim>{{{0, 4}}},
                                            range<PVector>(wavevectors), psd)
                  .real();
   moments[2] = (3 * m22 + m40 + m04) / 3.;
   return moments;
 }
 
 template struct Statistics<1>;
 template struct Statistics<2>;
 
 }  // namespace tamaas
diff --git a/src/core/tamaas.cpp b/src/core/tamaas.cpp
index a58552d..695e6b5 100644
--- a/src/core/tamaas.cpp
+++ b/src/core/tamaas.cpp
@@ -1,105 +1,104 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 
 #include "tamaas.hh"
-#include "fftw_interface.hh"
-#include "fftw_mpi_interface.hh"
+#include "fftw/interface.hh"
 #include "logger.hh"
 #include "mpi_interface.hh"
 #if TAMAAS_LOOP_BACKEND == TAMAAS_LOOP_BACKEND_OMP
 #include <omp.h>
 #endif
 
 /* -------------------------------------------------------------------------- */
 
 namespace tamaas {
 
 void initialize(UInt num_threads) {
   static bool has_warned = false;
 
   mpi::thread provided = mpi::thread::single;
 
   if (not mpi::initialized()) {
     mpi::init_thread(nullptr, nullptr, mpi::thread::multiple, &provided);
   }
 
   bool should_init_threads = (provided > mpi::thread::single);
 
 #if TAMAAS_LOOP_BACKEND == TAMAAS_LOOP_BACKEND_OMP
   if (num_threads)
     omp_set_num_threads(num_threads);  // set user-defined number of threads
   else
     num_threads = omp_get_max_threads();
 #else
   if (num_threads != 0)
     num_threads = 1;
 #endif
 
 #if TAMAAS_FFTW_BACKEND != TAMAAS_FFTW_BACKEND_NONE
   if (should_init_threads and (not fftw::init_threads())) {
     TAMAAS_EXCEPTION("FFTW could not initialize threads!");
   } else if (not should_init_threads)
     Logger().get(LogLevel::debug) << "not initializing FFTW threads\n";
 #endif
 
   if (mpi::initialized()) {
     if (not has_warned) {
       Logger().get(LogLevel::warning) << "experimental MPI support\n";
       has_warned = true;
     }
     fftw::mpi::init();
   }
 
   if (should_init_threads) {
 #if TAMAAS_FFTW_BACKEND != TAMAAS_FFTW_BACKEND_NONE
     Logger().get(LogLevel::debug)
         << "initializing FFTW with " << num_threads << " threads\n";
     fftw::plan_with_nthreads(num_threads);
 #endif
   }
 }
 
 /* -------------------------------------------------------------------------- */
 
 void finalize() {
   if (not mpi::finalized()) {
 #if TAMAAS_BACKEND != TAMAAS_BACKEND_CPP
     fftw::cleanup_threads();
 #endif
     fftw::mpi::cleanup();
     mpi::finalize();
   }
 }
 
 namespace {
 /// Manager for initialize + finalize
 struct entry_exit_points {
   entry_exit_points() { initialize(); }
   ~entry_exit_points() { finalize(); }
   static const entry_exit_points singleton;
 };
 
 const entry_exit_points entry_exit_points::singleton;
 }  // namespace
 
 }  // namespace tamaas
diff --git a/src/core/tamaas.hh b/src/core/tamaas.hh
index 46d2d4e..3b69cc7 100644
--- a/src/core/tamaas.hh
+++ b/src/core/tamaas.hh
@@ -1,190 +1,196 @@
 /**
  *  @mainpage Tamaas - A high-performance periodic contact library
  *
  *  @section Introduction
  *  Tamaas is a spectral-integral-equation based contact library. It is made
  *  with love to be fast and friendly!
  *
  *  @author Guillaume Anciaux <guillaume.anciaux@epfl.ch>
  *  @author Lucas Frérot <lucas.frerot@protonmail.com>
  *  @author Valentine Rey <valentine.rey@epfl.ch>
  *  @author Son Pham-Ba <son.phamba@epfl.ch>
  *
  *  @section License
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #ifndef TAMAAS_HH
 #define TAMAAS_HH
 /* -------------------------------------------------------------------------- */
-#define TAMAAS_USE_FFTW ! defined(TAMAAS_USE_CUDA)
+
+#ifndef TAMAAS_USE_CUDA
+#define TAMAAS_USE_FFTW
+#endif
+
+// Values for fftw backends
 #define TAMAAS_FFTW_BACKEND_OMP 2
 #define TAMAAS_FFTW_BACKEND_THREADS 2
 #define TAMAAS_FFTW_BACKEND_NONE 3
+// Values for thrust backends
 #define TAMAAS_LOOP_BACKEND_OMP 1
 #define TAMAAS_LOOP_BACKEND_TBB 2
 #define TAMAAS_LOOP_BACKEND_CPP 3
 #define TAMAAS_LOOP_BACKEND_CUDA 4
 // Default loop backend is OpenMP
 #ifndef TAMAAS_LOOP_BACKEND
 #define TAMAAS_LOOP_BACKEND TAMAAS_LOOP_BACKEND_OMP
 #endif
 // Default FFTW backend is none
 #ifndef TAMAAS_FFTW_BACKEND
 #define TAMAAS_FFTW_BACKEND TAMAAS_FFTW_BACKEND_NONE
 #endif
 // If the thrust device hasn't been set, set OpenMP
 #ifndef THRUST_DEVICE_SYSTEM
 #define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_OMP
 #endif
 /* -------------------------------------------------------------------------- */
 // Standard includes
 #include <exception>
 #include <iostream>
 #include <memory>
 #include <string>
 #include <type_traits>
 /* -------------------------------------------------------------------------- */
 // Special thrust includes
 #include <thrust/complex.h>
 #include <thrust/random.h>
 
 #ifdef TAMAAS_USE_CUDA
-#include "unified_allocator.hh"
+#include "cuda/unified_allocator.hh"
 #endif
-#include "fftw_allocator.hh"
+#include "fftw/fftw_allocator.hh"
 
 /* -------------------------------------------------------------------------- */
 namespace tamaas {
 /* -------------------------------------------------------------------------- */
 
 /// Cuda specific definitions
 #define CUDA_LAMBDA __device__ __host__
 
 #ifdef TAMAAS_USE_CUDA
 template <typename T>
 using Allocator = UnifiedAllocator<T>;
 #else
 template <typename T>
 using Allocator = FFTWAllocator<T>;
 #endif
 
 /// Common types definitions
 // If type macros have not been set, put default values
 #ifndef TAMAAS_REAL_TYPE
 #define TAMAAS_REAL_TYPE double
 #endif
 #ifndef TAMAAS_INT_TYPE
 #define TAMAAS_INT_TYPE int
 #endif
 
 using Real = TAMAAS_REAL_TYPE;           ///< default floating point type
 using Int = TAMAAS_INT_TYPE;             ///< default signed integer type
 using UInt = std::make_unsigned_t<Int>;  ///< default unsigned integer type
 template <typename T>
 using complex = thrust::complex<T>;  ///< template complex wrapper
 using Complex = complex<Real>;       ///< default floating point complex type
 
 /// Defining random toolbox
 using ::thrust::random::normal_distribution;
 using ::thrust::random::uniform_real_distribution;
 using random_engine = ::thrust::random::default_random_engine;
 
 namespace detail {
 template <bool acc, template <typename> class Trait, typename Head,
           typename... Tail>
 struct fold_trait_tail_rec
     : std::integral_constant<bool,
                              fold_trait_tail_rec<acc and Trait<Head>::value,
                                                  Trait, Tail...>::value> {};
 
 template <bool acc, template <typename> class Trait, typename Head>
 struct fold_trait_tail_rec<acc, Trait, Head>
     : std::integral_constant<bool, acc and Trait<Head>::value> {};
 }  // namespace detail
 
 template <template <typename> class Trait, typename... T>
 struct fold_trait : detail::fold_trait_tail_rec<true, Trait, T...> {};
 
 /* -------------------------------------------------------------------------- */
 /// initialize tamaas (0 threads => let OMP_NUM_THREADS decide)
 void initialize(UInt num_threads = 0);
 /// cleanup tamaas
 void finalize();
 /* -------------------------------------------------------------------------- */
 
 /// CUDA-compatible exchange function
 template <class U, class V = U>
 __device__ __host__ U exchange(U& obj, V&& new_value) {
   U old_value = std::move(obj);
   obj = std::forward<V>(new_value);
   return old_value;
 }
 
 /// Generic exception class
 class Exception : public std::exception {
 public:
   /// Constructor
   Exception(std::string mesg) : msg(std::move(mesg)) {}
 
   const char* what() const noexcept override { return msg.c_str(); }
 
   ~Exception() override = default;
 
 private:
   std::string msg;  ///< message of exception
 };
 /* -------------------------------------------------------------------------- */
 /// Enumeration of reduction operations
 enum class operation { plus, times, min, max };
 /* -------------------------------------------------------------------------- */
 }  // namespace tamaas
 /* -------------------------------------------------------------------------- */
 
 /// Convenience macros
 #define TAMAAS_DEBUG_MSG(mesg)                                                 \
   __FILE__ << ':' << __LINE__ << ": " << mesg << '\n'
 #define TAMAAS_EXCEPTION(mesg)                                                 \
   {                                                                            \
     std::stringstream sstr;                                                    \
     sstr << TAMAAS_DEBUG_MSG("FATAL: " << mesg);                               \
     throw ::tamaas::Exception(sstr.str());                                     \
   }
 
 #define SURFACE_FATAL(mesg) TAMAAS_EXCEPTION(mesg)
 
 #if defined(TAMAAS_DEBUG)
 #define TAMAAS_ASSERT(cond, reason)                                            \
   do {                                                                         \
     if (not(cond)) {                                                           \
       TAMAAS_EXCEPTION(#cond " assert failed: " << reason);                    \
     }                                                                          \
   } while (0)
 #define TAMAAS_DEBUG_EXCEPTION(reason) TAMAAS_EXCEPTION(reason)
 #else
 #define TAMAAS_ASSERT(cond, reason)
 #define TAMAAS_DEBUG_EXCEPTION(reason)
 #endif
 #define TAMAAS_ACCESSOR(var, type, name)                                       \
   type& get##name() { return var; }                                            \
   void set##name(const type& new_var) { var = new_var; }
 
 /* -------------------------------------------------------------------------- */
 
 #endif  // TAMAAS_HH
diff --git a/src/model/volume_potential.hh b/src/model/volume_potential.hh
index 6dc1ed5..a134d18 100644
--- a/src/model/volume_potential.hh
+++ b/src/model/volume_potential.hh
@@ -1,139 +1,139 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #ifndef VOLUME_POTENTIAL_HH
 #define VOLUME_POTENTIAL_HH
 /* -------------------------------------------------------------------------- */
-#include "fftw_engine.hh"
+#include "fft_engine.hh"
 #include "grid_hermitian.hh"
 #include "grid_view.hh"
 #include "integral_operator.hh"
 #include "logger.hh"
 #include "model_type.hh"
 /* -------------------------------------------------------------------------- */
 #include <functional>
 /* -------------------------------------------------------------------------- */
 namespace tamaas {
 
 /// Trait type for component management
 template <UInt derivative>
 struct derivative_traits;
 
 template <>
 struct derivative_traits<0> {
   template <model_type type>
   static constexpr UInt source_components = model_type_traits<type>::components;
   template <model_type type>
   static constexpr UInt out_components = model_type_traits<type>::components;
 };
 
 template <>
 struct derivative_traits<1> {
   template <model_type type>
   static constexpr UInt source_components = model_type_traits<type>::voigt;
   template <model_type type>
   static constexpr UInt out_components = model_type_traits<type>::components;
 };
 
 template <>
 struct derivative_traits<2> {
   template <model_type type>
   static constexpr UInt source_components = model_type_traits<type>::voigt;
   template <model_type type>
   static constexpr UInt out_components = model_type_traits<type>::voigt;
 };
 
 /**
  * @brief Volume potential operator class. Applies the operators for computation
  * of displacements and strains due to residual/eigen strains
  */
 template <model_type type>
 class VolumePotential : public IntegralOperator {
   using trait = model_type_traits<type>;
 
 protected:
   using filter_t = std::function<bool(UInt)>;
 
 public:
   VolumePotential(Model* model);
 
   /// Update from model (does nothing)
   void updateFromModel() override {}
 
   /// Kind
   IntegralOperator::kind getKind() const override {
     return IntegralOperator::neumann;
   }
 
   /// Type
   model_type getType() const override;
 
   /// Apply to all of the source layers
   void apply(GridBase<Real>& input, GridBase<Real>& output) const override {
     applyIf(input, output, [](UInt) { return true; });
   }
 
 protected:
   /// Transform source layer-by-layer
   void transformSource(GridBase<Real>& in, filter_t pred) const;
   /// Transform all source
   void transformSource(GridBase<Real>& in) const {
     transformSource(in, [](auto) { return true; });
   }
   /// Transform output layer-by-layer
   template <typename Func>
   void transformOutput(Func func, GridBase<Real>& out) const;
 
   /// Initialize fourier buffers
   void initialize(UInt source_components, UInt out_components,
                   UInt source_buffer_size);
 
 protected:
   Grid<Real, trait::boundary_dimension> wavevectors;
   using BufferType = GridHermitian<Real, trait::boundary_dimension>;
   mutable std::vector<BufferType> source_buffer;
   mutable std::vector<BufferType> out_buffer;
   mutable std::unique_ptr<FFTEngine> engine;
 };
 
 /* -------------------------------------------------------------------------- */
 /* Template implementation */
 /* -------------------------------------------------------------------------- */
 
 template <model_type type>
 template <typename Func>
 void VolumePotential<type>::transformOutput(Func func,
                                             GridBase<Real>& out) const {
   constexpr UInt dim = trait::dimension;
   auto& o = dynamic_cast<Grid<Real, dim>&>(out);
 
   // Transforming output
   for (UInt layer : Loop::range(o.sizes().front())) {
     auto out_layer = make_view(o, layer);
     auto& fourier_out_layer = func(out_buffer, layer);
     engine->backward(out_layer, fourier_out_layer);
   }
 }
 
 }  // namespace tamaas
 
 #endif  // VOLUME_POTENTIAL_HH
diff --git a/src/model/westergaard.hh b/src/model/westergaard.hh
index 284bb95..12b9294 100644
--- a/src/model/westergaard.hh
+++ b/src/model/westergaard.hh
@@ -1,91 +1,91 @@
 /**
  *  @file
  *  LICENSE
  *
  *  Copyright (©) 2016-2021 EPFL (École Polytechnique Fédérale de Lausanne),
  *  Laboratory (LSMS - Laboratoire de Simulation en Mécanique des Solides)
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU Affero General Public License as published
  *  by the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU Affero General Public License for more details.
  *
  *  You should have received a copy of the GNU Affero General Public License
  *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
  *
  */
 /* -------------------------------------------------------------------------- */
 #ifndef WESTERGAARD_HH
 #define WESTERGAARD_HH
 /* -------------------------------------------------------------------------- */
 #include "grid_hermitian.hh"
 #include "integral_operator.hh"
 #include "model_type.hh"
-#include "fftw_engine.hh"
+#include "fft_engine.hh"
 #include "tamaas.hh"
 /* -------------------------------------------------------------------------- */
 
 namespace tamaas {
 
 /**
  * @brief Operator based on Westergaard solution and the Dicrete Fourier
  * Transform.
  * This class is templated with model type to allow efficient storage of the
  * influence coefficients.
  * The integral operator is only applied to surface pressure/displacements,
  * even for volume models.
  */
 template <model_type mtype, IntegralOperator::kind otype>
 class Westergaard : public IntegralOperator {
   using trait = model_type_traits<mtype>;
   static constexpr UInt dim = trait::dimension;
   static constexpr UInt bdim = trait::boundary_dimension;
   static constexpr UInt comp = trait::components;
 
 public:
   /// Constuctor: initalizes influence coefficients and allocates buffer
   Westergaard(Model* model);
 
   /// Get influence coefficients
   const GridHermitian<Real, bdim>& getInfluence() const { return influence; }
 
   /// Apply influence coefficients to input
   void apply(GridBase<Real>& input, GridBase<Real>& ouput) const override;
 
   /// Update the influence coefficients
   void updateFromModel() override { initInfluence(); }
 
   /// Kind
   IntegralOperator::kind getKind() const override { return otype; }
 
   /// Type
   model_type getType() const override { return mtype; }
 
   /// Initialize influence coefficients
   void initInfluence();
 
   template <typename Functor>
   void initFromFunctor(Functor func);
 
   /// Apply a functor in Fourier space
   template <typename Functor>
   void fourierApply(Functor func, GridBase<Real>& in,
                     GridBase<Real>& out) const;
 
   /// Compute L_2 norm of influence functions
   Real getOperatorNorm() override;
 
 public:
   GridHermitian<Real, bdim> influence;
   mutable GridHermitian<Real, bdim> buffer;
   mutable std::unique_ptr<FFTEngine> engine;
 };
 
 }  // namespace tamaas
 
 #endif  // WESTERGAARD_HH